1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-08 14:22:09 +03:00
Files
mariadb-columnstore-engine/procmgr/processmanager.cpp
david hill 5c34be958b mcol-943
2017-10-19 15:48:39 -05:00

10339 lines
295 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
Copyright (C) 2016 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/******************************************************************************************
* $Id: processmanager.cpp 2216 2013-08-13 14:34:10Z dhill $
*
******************************************************************************************/
//#define NDEBUG
#include <cassert>
#include "processmanager.h"
#include "installdir.h"
#include "dbrm.h"
#include "cacheutils.h"
#include "ddlcleanuputil.h"
using namespace cacheutils;
using namespace std;
using namespace processmanager;
using namespace messageqcpp;
using namespace oam;
using namespace logging;
using namespace alarmmanager;
using namespace config;
pthread_mutex_t STATUS_LOCK;
pthread_mutex_t THREAD_LOCK;
extern string cloud;
extern bool amazon;
extern bool runStandby;
extern string iface_name;
extern string PMInstanceType;
extern string UMInstanceType;
extern string GlusterConfig;
extern bool rootUser;
extern string USER;
extern bool HDFS;
extern string localHostName;
extern string PMwithUM;
extern string AmazonPMFailover;
extern string DBRootStorageType;
extern int requestCount;
typedef map<string, int> moduleList;
extern moduleList moduleInfoList;
bool gOAMParentModuleFlag;
oam::DeviceNetworkList startdevicenetworklist;
int upgradethreadStatus = oam::API_SUCCESS;
int startsystemthreadStatus = oam::API_SUCCESS;
int stopsystemthreadStatus = oam::API_SUCCESS;
int startmodulethreadStatus = oam::API_SUCCESS;
bool startsystemthreadStop = false;
bool startsystemthreadRunning = false;
string gdownActiveOAMModule;
vector<string> downModuleList;
bool startFailOver = false;
string masterLogFile = oam::UnassignedName;
string masterLogPos = oam::UnassignedName;
HeartBeatProcList hbproclist;
namespace processmanager{
/******************************************************************************************
* @brief Configuration Constructor
*
* purpose: Configuration Constructor
*
******************************************************************************************/
Configuration::Configuration()
{
Oam oam;
oamModuleInfo_t t;
try {
t = oam.getModuleInfo();
flocalModuleName = boost::get<0>(t);
flocalModuleType = boost::get<1>(t);
flocalModuleID = boost::get<2>(t);
fOAMParentModuleName = boost::get<3>(t);
fOAMParentModuleFlag = boost::get<4>(t);
fserverInstallType = boost::get<5>(t);
fOAMStandbyModuleName = boost::get<6>(t);
fOAMStandbyModuleFlag = boost::get<7>(t);
gOAMParentModuleFlag = boost::get<4>(t);
}
catch (exception& e) {
cout << endl << "ProcMgr Construct Error = " << e.what() << endl;
exit(-1);
}
}
/******************************************************************************************
* @brief Configuration Destructor#
*
* purpose: Configuration
*
******************************************************************************************/
Configuration::~Configuration()
{
}
/******************************************************************************************
* @brief getstateInfo
*
* purpose: Return the module opstate tag
*
******************************************************************************************/
string Configuration::getstateInfo(string moduleName)
{
return stateInfoList[moduleName];
}
/******************************************************************************************
* @brief ProcessLog Constructor
*
* purpose: ProcessLog Constructorname
*
******************************************************************************************/
ProcessLog::ProcessLog()
{
}
/******************************************************************************************
* @brief ProcessLog Destructor
*
* purpose: ProcessLog Destructor
*
******************************************************************************************/
ProcessLog::~ProcessLog()
{
}
/******************************************************************************************
* @brief writeLog
*
* purpose: Write the message to the log
*
******************************************************************************************/
void ProcessLog::writeLog(const int lineNumber, const string logContent, const LOG_TYPE logType)
{
LoggingID lid(17);
MessageLog ml(lid);
Message msg;
Message::Args args;
if (logType == LOG_TYPE_ERROR)
{
args.add("line:");
args.add(lineNumber);
}
args.add(logContent);
msg.format(args);
switch(logType) {
case LOG_TYPE_DEBUG:
try {
ml.logDebugMessage(msg);
}
catch(...) {}
break;
case LOG_TYPE_INFO:
try {
ml.logInfoMessage(msg);
}
catch(...) {}
break;
case LOG_TYPE_WARNING:
try {
ml.logWarningMessage(msg);
}
catch(...) {}
break;
case LOG_TYPE_ERROR:
try {
ml.logErrorMessage(msg);
}
catch(...) {}
break;
case LOG_TYPE_CRITICAL:
try {
ml.logCriticalMessage(msg);
}
catch(...) {}
break;
}
return;
}
/******************************************************************************************
* @brief writeLog
*
* purpose: Write the message to the log
*
******************************************************************************************/
void ProcessLog::writeLog(const int lineNumber, const int logContent, const LOG_TYPE logType)
{
LoggingID lid(17);
MessageLog ml(lid);
Message msg;
Message::Args args;
args.add(logContent);
msg.format(args);
switch(logType) {
case LOG_TYPE_DEBUG:
ml.logDebugMessage(msg);
break;
case LOG_TYPE_INFO:
ml.logInfoMessage(msg);
break;
case LOG_TYPE_WARNING:
ml.logWarningMessage(msg);
break;
case LOG_TYPE_ERROR:
args.add("line:");
args.add(lineNumber);
ml.logErrorMessage(msg);
break;
case LOG_TYPE_CRITICAL:
ml.logCriticalMessage(msg);
break;
}
return;
}
/******************************************************************************************
* @brief setSysLogData
*
* purpose: Write the message to the log
*
******************************************************************************************/
void ProcessLog::setSysLogData()
{
return;
}
/******************************************************************************************
* @brief getSysLogData
*
* purpose: return the sysLogData
*
******************************************************************************************/
string ProcessLog::getSysLogData()
{
string i;
return i;
}
/******************************************************************************************
* @brief writeSystemLog
*
* purpose: log process status change into system log
*
******************************************************************************************/
void ProcessLog::writeSystemLog()
{
}
/******************************************************************************************
* @brief ProcessManager Constructor
*
* purpose: ProcessManager Constructor
*
******************************************************************************************/
ProcessManager::ProcessManager(Configuration &aconfig, ProcessLog &alog):config(aconfig), log(alog)
{
}
/******************************************************************************************
* @brief ProcessManager Destructor
*
* purpose: ProcessManager Destructor
*
******************************************************************************************/
ProcessManager::~ProcessManager()
{
}
/******************************************************************************************
* @brief processMSG
*
* purpose: Process the received message
*
******************************************************************************************/
//void ProcessManager::processMSG( messageqcpp::IOSocket fIos, messageqcpp::ByteStream msg)
void processMSG(messageqcpp::IOSocket* cfIos)
{
messageqcpp::IOSocket fIos = *cfIos;
pthread_t ThreadId;
ThreadId = pthread_self();
ByteStream msg;
try{
msg = fIos.read();
}
catch(...)
{
pthread_detach (ThreadId);
pthread_exit(0);
}
if (msg.length() <= 0) {
fIos.close();
pthread_detach (ThreadId);
pthread_exit(0);
}
ByteStream::byte msgType;
msg >> msgType;
Oam oam;
ProcessLog log;
// log.writeLog(__LINE__, "** processMSG msg type: " + oam.itoa(msgType), LOG_TYPE_DEBUG);
Configuration config;
ProcessManager processManager(config, log);
ByteStream::byte actionType;
string target;
ByteStream::byte graceful;
ByteStream::byte ackIndicator = 0;
ByteStream::byte manualFlag;
ByteStream ackMsg;
ByteStream::byte status = 0;
ALARMManager aManager;
SystemModuleTypeConfig systemmoduletypeconfig;
SystemProcessConfig systemprocessconfig;
try{
oam.getSystemConfig(systemmoduletypeconfig);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
switch (msgType) {
case REQUEST:
msg >> actionType;
msg >> target;
msg >> graceful;
msg >> ackIndicator;
msg >> manualFlag;
switch (actionType) {
case STOPMODULE:
{
uint16_t count, hostConfigCount;
string value;
oam::DeviceNetworkConfig devicenetworkconfig;
oam::DeviceNetworkList devicenetworklist;
//get module count to remove
msg >> count;
if ( count > 0 ) {
for (int i = 0; i < count; i++)
{
msg >> value;
devicenetworkconfig.DeviceName = value;
msg >> value;
devicenetworkconfig.UserTempDeviceName = value;
msg >> value;
devicenetworkconfig.DisableState = value;
devicenetworklist.push_back(devicenetworkconfig);
msg >> hostConfigCount;
}
string password;
msg >> password;
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleName = (*listPT).DeviceName;
log.writeLog(__LINE__, "MSG RECEIVED: Stop Module request on " + moduleName );
string moduletype = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
status = API_SUCCESS;
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus(moduleName, opState, degraded);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) {
status = API_DISABLED;
log.writeLog(__LINE__, "Stop Module requested Ignored on a Disabled " + moduleName);
}
else {
status = processManager.stopModule(moduleName, graceful, manualFlag);
log.writeLog(__LINE__, "Stop Module Completed on " + moduleName, LOG_TYPE_INFO);
Configuration config;
if ( moduleName == config.OAMStandbyName() ) {
string newStandbyModule = processManager.getStandbyModule();
if ( !newStandbyModule.empty() && newStandbyModule != "NONE")
processManager.setStandbyModule(newStandbyModule);
else
{
Config* sysConfig = Config::makeConfig();
// clear Standby OAM Module
sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);
//update Columnstore Config table
try {
sysConfig->write();
}
catch(...)
{
log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
}
}
}
}
}
}
else
{
status = oam::API_INVALID_PARAMETER;
log.writeLog(__LINE__, "STOPMODULE: Module Count invalid = " + oam.itoa(count));
}
log.writeLog(__LINE__, "STOPMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status));
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "STOPMODULE: ACK back to sender");
}
break;
}
case SHUTDOWNMODULE:
{
uint16_t count, hostConfigCount;
string value;
oam::DeviceNetworkConfig devicenetworkconfig;
oam::DeviceNetworkList devicenetworklist;
//get module count to remove
msg >> count;
if ( count > 0 ) {
for (int i = 0; i < count; i++)
{
msg >> value;
devicenetworkconfig.DeviceName = value;
msg >> value;
devicenetworkconfig.UserTempDeviceName = value;
msg >> value;
devicenetworkconfig.DisableState = value;
devicenetworklist.push_back(devicenetworkconfig);
msg >> hostConfigCount;
}
string password;
msg >> password;
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleName = (*listPT).DeviceName;
log.writeLog(__LINE__, "MSG RECEIVED: Shutdown Module request on " + moduleName );
status = API_SUCCESS;
log.writeLog(__LINE__, "Shutdown Module Requested on " + moduleName, LOG_TYPE_INFO);
processManager.shutdownModule(moduleName, graceful, manualFlag, 0);
//check for SIMPLEX Processes on mate might need to be started
processManager.checkSimplexModule(moduleName);
Configuration config;
if ( moduleName == config.OAMStandbyName() ) {
string newStandbyModule = processManager.getStandbyModule();
if ( !newStandbyModule.empty() && newStandbyModule != "NONE")
processManager.setStandbyModule(newStandbyModule);
}
}
}
else
{
status = oam::API_INVALID_PARAMETER;
log.writeLog(__LINE__, "SHUTDOWNMODULE: Module Count invalid = " + oam.itoa(count));
}
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "SHUTDOWNMODULE: ACK back to sender, return status = " + oam.itoa(status));
}
break;
}
case STARTMODULE:
{
log.writeLog(__LINE__, "MSG RECEIVED: Start Module request" );
startsystemthreadStop = false;
uint16_t count, hostConfigCount;
string value;
oam::DeviceNetworkConfig devicenetworkconfig;
startdevicenetworklist.clear();
//get module count to remove
msg >> count;
if ( count > 0 ) {
for (int i = 0; i < count; i++)
{
msg >> value;
devicenetworkconfig.DeviceName = value;
msg >> value;
devicenetworkconfig.UserTempDeviceName = value;
msg >> value;
devicenetworkconfig.DisableState = value;
startdevicenetworklist.push_back(devicenetworkconfig);
msg >> hostConfigCount;
}
string password;
msg >> password;
pthread_t startsystemthread;
status = pthread_create (&startsystemthread, NULL, (void*(*)(void*)) &startSystemThread, &startdevicenetworklist);
if ( status != 0 ) {
log.writeLog(__LINE__, "STARTMODULE: pthread_create failed, return status = " + oam.itoa(status));
status = API_FAILURE;
}
if (status == 0 && ackIndicator)
{
pthread_join(startsystemthread, NULL);
status = startsystemthreadStatus;
}
if( status == API_SUCCESS) {
//distribute config file
processManager.distributeConfigFile("system");
//call dbrm control
oam.dbrmctl("halt");
log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG);
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
processManager.restartProcessType("ExeMgr");
//setup MySQL Replication for started modules
log.writeLog(__LINE__, "Setup MySQL Replication for module being started", LOG_TYPE_DEBUG);
processManager.setMySQLReplication(startdevicenetworklist);
}
}
else
{
status = oam::API_INVALID_PARAMETER;
log.writeLog(__LINE__, "STARTMODULE: Module Count invalid = " + oam.itoa(count));
}
log.writeLog(__LINE__, "STARTMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status));
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "STARTMODULE: ACK back to sender");
}
break;
}
case RESTARTMODULE:
{
uint16_t count, hostConfigCount;
string value;
oam::DeviceNetworkConfig devicenetworkconfig;
startdevicenetworklist.clear();
startsystemthreadStop = false;
//get module count to remove
msg >> count;
if ( count > 0 ) {
for (int i = 0; i < count; i++)
{
msg >> value;
devicenetworkconfig.DeviceName = value;
msg >> value;
devicenetworkconfig.UserTempDeviceName = value;
msg >> value;
devicenetworkconfig.DisableState = value;
startdevicenetworklist.push_back(devicenetworkconfig);
msg >> hostConfigCount;
}
string password;
msg >> password;
DeviceNetworkList::iterator listPT = startdevicenetworklist.begin();
for( ; listPT != startdevicenetworklist.end() ; listPT++)
{
string moduleName = (*listPT).DeviceName;
log.writeLog(__LINE__, "MSG RECEIVED: Restart Module request on " + moduleName );
status = API_SUCCESS;
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus(moduleName, opState, degraded);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
if (opState != oam::MAN_DISABLED) {
status = processManager.stopModule(moduleName, graceful, manualFlag);
log.writeLog(__LINE__, "Stop Module Completed on " + moduleName, LOG_TYPE_INFO);
Configuration config;
if ( moduleName == config.OAMStandbyName() ) {
string newStandbyModule = processManager.getStandbyModule();
if ( !newStandbyModule.empty() && newStandbyModule != "NONE")
processManager.setStandbyModule(newStandbyModule);
}
}
else {
status = API_DISABLED;
log.writeLog(__LINE__, "Stop Module requested Ignored on a Disabled " + moduleName);
}
}
pthread_t startsystemthread;
status = pthread_create (&startsystemthread, NULL, (void*(*)(void*)) &startSystemThread, &startdevicenetworklist);
if ( status != 0 ) {
log.writeLog(__LINE__, "RESTARTMODULE: pthread_create failed, return status = " + oam.itoa(status));
status = API_FAILURE;
}
if (status == 0 && ackIndicator)
{
pthread_join(startsystemthread, NULL);
status = startsystemthreadStatus;
}
if( status == API_SUCCESS) {
//distribute config file
processManager.distributeConfigFile("system");
processManager.restartProcessType("ExeMgr");
}
}
else
{
status = oam::API_INVALID_PARAMETER;
log.writeLog(__LINE__, "RESTARTMODULE: Module Count invalid = " + oam.itoa(count));
}
log.writeLog(__LINE__, "RESTARTMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status));
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "RESTARTMODULE: ACK back to sender");
}
break;
}
case DISABLEMODULE:
{
uint16_t count, hostConfigCount;
string value;
oam::DeviceNetworkConfig devicenetworkconfig;
oam::DeviceNetworkList devicenetworklist;
//get module count to remove
msg >> count;
if ( count > 0 ) {
for (int i = 0; i < count; i++)
{
msg >> value;
devicenetworkconfig.DeviceName = value;
msg >> value;
devicenetworkconfig.UserTempDeviceName = value;
msg >> value;
devicenetworkconfig.DisableState = value;
devicenetworklist.push_back(devicenetworkconfig);
msg >> hostConfigCount;
}
string password;
msg >> password;
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleName = (*listPT).DeviceName;
log.writeLog(__LINE__, "MSG RECEIVED: Disable Module request on " + moduleName );
// check module status, Disable module
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus(moduleName, opState, degraded);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
//don't allow disble of current Parent OAM Module
if ( moduleName == config.moduleName() )
{
log.writeLog(__LINE__, "ERROR: can't disable Parent OAM module", LOG_TYPE_ERROR);
status = API_INVALID_PARAMETER;
break;
}
if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED
|| opState == oam::AUTO_DISABLED ) {
oam.dbrmctl("halt");
log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG);
status = processManager.disableModule(moduleName, true);
log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO);
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// resume the dbrm
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
//check for SIMPLEX Processes on mate might need to be started
processManager.checkSimplexModule(moduleName);
}
else
{
log.writeLog(__LINE__, "ERROR: module not stopped", LOG_TYPE_ERROR);
status = API_FAILURE;
break;
}
}
}
else
{
status = oam::API_INVALID_PARAMETER;
log.writeLog(__LINE__, "DISABLEMODULE: Module Count invalid = " + oam.itoa(count));
}
log.writeLog(__LINE__, "DISABLEMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status));
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "DISABLEMODULE: ACK back to sender");
}
break;
}
case ENABLEMODULE:
{
uint16_t count, hostConfigCount;
string value;
oam::DeviceNetworkConfig devicenetworkconfig;
oam::DeviceNetworkList devicenetworklist;
//get module count to remove
msg >> count;
if ( count > 0 ) {
for (int i = 0; i < count; i++)
{
msg >> value;
devicenetworkconfig.DeviceName = value;
msg >> value;
devicenetworkconfig.UserTempDeviceName = value;
msg >> value;
devicenetworkconfig.DisableState = value;
devicenetworklist.push_back(devicenetworkconfig);
msg >> hostConfigCount;
}
string password;
msg >> password;
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
//stopModules being removed with the REMOVE option, which will stop process
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleName = (*listPT).DeviceName;
log.writeLog(__LINE__, "MSG RECEIVED: Enable Module request on " + moduleName );
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus(moduleName, opState, degraded);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
if (opState == oam::MAN_DISABLED) {
status = processManager.enableModule(moduleName, oam::MAN_OFFLINE);
log.writeLog(__LINE__, "Enable Module Completed on " + moduleName, LOG_TYPE_INFO);
}
else
{
log.writeLog(__LINE__, "ERROR: module name not Disabled", LOG_TYPE_ERROR);
status = API_INVALID_STATE;
break;
}
}
}
else
{
status = oam::API_INVALID_PARAMETER;
log.writeLog(__LINE__, "ENABLEMODULE: Module Count invalid = " + oam.itoa(count));
}
log.writeLog(__LINE__, "ENABLEMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status));
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "ENABLEMODULE: ACK back to sender");
}
break;
}
case STOPSYSTEM:
{
log.writeLog(__LINE__, "MSG RECEIVED: Stop System request..." );
// GRACEFUL_WAIT means that we are shutting down, but waiting for
// all transactions to finish or rollback as commanded. This is only set if
// there are, in fact, transactions active (or cpimport).
if (graceful == GRACEFUL_WAIT)
{
ByteStream stillWorkingMsg;
stillWorkingMsg << (ByteStream::byte) oam::ACK;
stillWorkingMsg << actionType;
stillWorkingMsg << target;
stillWorkingMsg << (ByteStream::byte) API_STILL_WORKING;
// This wait can take a while. We wait for table locks to release and open transactions to commit.
if (oam.waitForSystem(STOPSYSTEM, fIos, stillWorkingMsg))
{
graceful = GRACEFUL; // ProcMonitor doesn't know GRACEFUL_WAIT.
// Send an ack back to say we're done waiting and are now shutting down.
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) API_TRANSACTIONS_COMPLETE;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "STOPSYSTEM: ACK transactions complete back to sender, return status = " + oam.itoa(API_TRANSACTIONS_COMPLETE));
}
else
{
// We've been cancelled.
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) API_CANCELLED;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender (canceled)");
break;
}
}
}
//set the flag to have any startsystemthreads to exit out before stop is done
startsystemthreadStop = true;
if ( startsystemthreadRunning )
sleep(5);
//stop by process type first, if system is ACTIVE
SystemStatus systemstatus;
try {
oam.getSystemStatus(systemstatus);
}
catch(...)
{}
//set system status
processManager.setSystemState(oam::MAN_INIT);
if (HDFS)
{
oam::DeviceNetworkList devicenetworklist;
pthread_t stopsystemthread;
status = pthread_create (&stopsystemthread, NULL, (void*(*)(void*)) &stopSystemThread, &devicenetworklist);
if ( status != 0 ) {
log.writeLog(__LINE__, "STOPSYSTEMS: pthread_create failed, return status = " + oam.itoa(status));
status = API_FAILURE;
}
if (status == 0 && ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender");
}
break;
}
//call to update module status and send notification message
for( unsigned int i = 0 ;i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
continue;
processManager.stopModule((*pt).DeviceName, STATUS_UPDATE, manualFlag, 0);
}
}
//set query system state not ready
processManager.setQuerySystemState(false);
if (systemstatus.SystemOpState == ACTIVE && graceful == oam::GRACEFUL)
processManager.stopProcessTypes(manualFlag);
//stop all of processes..
for( unsigned int i = 0 ;i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
//skip OAM Parent module, do at the end
if ( (*pt).DeviceName == config.moduleName() )
continue;
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
continue;
log.writeLog(__LINE__, "STOPSYSTEM: Request Stop Module on " + (*pt).DeviceName );
// int retStatus = processManager.stopModule((*pt).DeviceName, graceful, manualFlag, 0);
processManager.stopModule((*pt).DeviceName, graceful, manualFlag, 0);
// log.writeLog(__LINE__, "STOPSYSTEM: ACK received from Process-Monitor, return status = " + oam.itoa(status));
// if (retStatus != API_SUCCESS)
// status = retStatus;
}
}
//wait until all child modules are offline or A FAILURE HAS OCCURRED
bool failure = false;
bool stopped = true;
for ( int retry = 0 ; retry < 30 ; retry++ )
{
sleep(1);
stopped = true;
for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if ( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++)
{
string moduleName = (*pt).DeviceName;
//skip OAM Parent module, do at the end
if ( moduleName == config.moduleName() )
continue;
int opState = oam::ACTIVE;
try
{
bool degraded;
oam.getModuleStatus(moduleName, opState, degraded);
if (opState == oam::FAILED) {
failure = true;
log.writeLog(__LINE__, "STOPSYSTEM: Failed, failure on module " + moduleName, LOG_TYPE_ERROR);
break;
}
if (opState == oam::MAN_OFFLINE ||
opState == oam::MAN_DISABLED ||
opState == oam::AUTO_DISABLED )
continue;
stopped = false;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on : " + error, LOG_TYPE_ERROR);
}
catch (...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
}
if ( failure )
break;
}
if ( failure)
break;
if ( stopped )
break;
}
if ( failure )
{
processManager.setSystemState(oam::FAILED);
}
else
{
if ( !stopped)
{
//timeout waiting for system to stop, error out
log.writeLog(__LINE__, "STOPSYSTEM: Failed, timeout waiting for module to stop", LOG_TYPE_ERROR);
processManager.setSystemState(oam::FAILED);
}
else
{
//now stop local module
processManager.stopModule(config.moduleName(), graceful, manualFlag );
//run save.brm script
processManager.saveBRM(false);
log.writeLog(__LINE__, "Stop System Completed Success", LOG_TYPE_INFO);
processManager.setSystemState(oam::MAN_OFFLINE);
//clearout auto move dbroots files
string cmd = "rm -f " + startup::StartUp::installDir() + "/local/moveDbrootTransactionLog";
system(cmd.c_str());
cmd = "touch " + startup::StartUp::installDir() + "/local/moveDbrootTransactionLog";
system(cmd.c_str());
}
}
if (ackIndicator)
{
ackMsg.reset();
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) API_SUCCESS;
fIos.write(ackMsg);
log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender");
}
startsystemthreadStop = false;
break;
}
case SHUTDOWNSYSTEM:
{
log.writeLog(__LINE__, "MSG RECEIVED: Shutdown System request..." );
// GRACEFUL_WAIT means that we are shutting down, but waiting for
// all transactions to finish or rollback as commanded. This is only set if
// there are, in fact, transactions active (or cpimport).
int retStatus = oam::API_SUCCESS;
if (HDFS)
{
if (ackIndicator)
{
ackMsg.reset();
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "SHUTDOWNSYSTEM: ACK back to sender, return status = " + oam.itoa(API_SUCCESS));
}
Config* sysConfig = Config::makeConfig();
// clear Standby OAM Module
sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);
//update Columnstore Config table
try {
sysConfig->write();
}
catch(...)
{
log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
}
string cmd = "pdsh -a -x " + localHostName + " '" + startup::StartUp::installDir() + "/columnstore stop' > /dev/null 2>&1";
system(cmd.c_str());
break;
}
else
{
for( unsigned int i = 0 ;i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
//do local module last
if ( (*pt).DeviceName == config.moduleName() )
{
continue;
}
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
continue;
retStatus = processManager.shutdownModule((*pt).DeviceName, graceful, manualFlag, 0);
}
}
}
if (ackIndicator)
{
ackMsg.reset();
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "SHUTDOWNSYSTEM: ACK back to sender, return status = " + oam.itoa(API_SUCCESS));
}
Config* sysConfig = Config::makeConfig();
// clear Standby OAM Module
sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);
//update Columnstore Config table
try {
sysConfig->write();
}
catch(...)
{
log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
}
//clearout auto move dbroots files
string cmd = "rm -f " + startup::StartUp::installDir() + "/local/moveDbrootTransactionLog";
system(cmd.c_str());
cmd = "touch " + startup::StartUp::installDir() + "/local/moveDbrootTransactionLog";
system(cmd.c_str());
// now do local module
processManager.shutdownModule(config.moduleName(), graceful, manualFlag);
break;
}
case STARTSYSTEM:
{
log.writeLog(__LINE__, "MSG RECEIVED: Start System request...ackIndicator=" + oam.itoa(ackIndicator));
startsystemthreadStop = false;
// get system status and don't process if already in-progress
try {
SystemStatus systemstatus;
oam.getSystemStatus(systemstatus);
if (systemstatus.SystemOpState == MAN_INIT) {
log.writeLog(__LINE__, "STARTSYSTEM: Start already in-progess");
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) API_ALREADY_IN_PROGRESS;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "STARTSYSTEM: ACK back to sender");
}
break;
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
oam::DeviceNetworkList devicenetworklist;
pthread_t startsystemthread;
status = pthread_create (&startsystemthread, NULL, (void*(*)(void*)) &startSystemThread, &devicenetworklist);
if ( status != 0 ) {
log.writeLog(__LINE__, "STARTSYSTEMS: pthread_create failed, return status = " + oam.itoa(status));
status = API_FAILURE;
}
if (status == 0 && ackIndicator)
{
pthread_join(startsystemthread, NULL);
status = stopsystemthreadStatus;
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "STARTSYSTEM: ACK back to sender");
}
log.writeLog(__LINE__, "STARTSYSTEM: Start System Request Completed with status = " + oam.itoa(status));
break;
}
case RESTARTSYSTEM:
{
log.writeLog(__LINE__, "MSG RECEIVED: Restart System request..." );
startsystemthreadStop = false;
// GRACEFUL_WAIT means that we are shutting down, but waiting for
// all transactions to finish or rollback as commanded. This is only set if
// there are, in fact, transactions active (or cpimport).
if (graceful == GRACEFUL_WAIT)
{
ByteStream stillWorkingMsg;
stillWorkingMsg << (ByteStream::byte) oam::ACK;
stillWorkingMsg << actionType;
stillWorkingMsg << target;
stillWorkingMsg << (ByteStream::byte) API_STILL_WORKING;
// This wait can take a while. We wait for table locks to release and open transactions to commit.
if (oam.waitForSystem(RESTARTSYSTEM, fIos, stillWorkingMsg))
{
graceful = GRACEFUL; // ProcMonitor doesn't know GRACEFUL_WAIT.
// Send an ack back to say we're done waiting and are now shutting down.
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) API_TRANSACTIONS_COMPLETE;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "RESTARTSYSTEM: ACK transactions complete back to sender, return status = " + oam.itoa(API_TRANSACTIONS_COMPLETE));
}
else
{
// We've been cancelled.
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) API_CANCELLED;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "RESTARTSYSTEM: ACK back to sender (canceled)");
break;
}
}
}
//set the flag to have any startsystemthreads to exit out before stop is done
startsystemthreadStop = true;
if ( startsystemthreadRunning )
sleep(5);
//get system status
SystemStatus systemstatus;
try {
oam.getSystemStatus(systemstatus);
}
catch(...)
{}
//set system status
processManager.setSystemState(oam::MAN_OFFLINE);
//call to update module status and send notification message
//stop all of processes..
for( unsigned int i = 0 ;i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
continue;
processManager.stopModule((*pt).DeviceName, STATUS_UPDATE, manualFlag);
}
}
//stop by process type first, if system is ACTIVE
if (systemstatus.SystemOpState == ACTIVE)
processManager.stopProcessTypes(manualFlag);
status = API_SUCCESS;
// stop modules
for( unsigned int i = 0 ;i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
//skip OAM Parent module, do at the end
if ( (*pt).DeviceName == config.moduleName() )
continue;
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
continue;
log.writeLog(__LINE__, "RESTARTSYSTEM: Request Stop Module on " + (*pt).DeviceName );
int retStatus = processManager.stopModule((*pt).DeviceName, graceful, manualFlag);
log.writeLog(__LINE__, "RESTARTSYSTEM: ACK received from Process-Monitor, return status = " + oam.itoa(status));
if (retStatus != API_SUCCESS)
status = retStatus;
}
}
//now stop local module
processManager.stopModule(config.moduleName(), graceful, manualFlag );
//run save.brm script
processManager.saveBRM(false);
log.writeLog(__LINE__, "RESTARTSYSTEM: ACK received from Process-Monitor for stopModule requests, return status = " + oam.itoa(status));
startsystemthreadStop = false;
if (status == API_SUCCESS ) {
//distribute config file
processManager.distributeConfigFile("system");
oam::DeviceNetworkList devicenetworklist;
pthread_t startsystemthread;
pthread_create (&startsystemthread, NULL, (void*(*)(void*)) &startSystemThread, &devicenetworklist);
if ( status != 0 ) {
log.writeLog(__LINE__, "STARTMODULE: pthread_create failed, return status = " + oam.itoa(status));
status = API_FAILURE;
}
if (status == 0 && ackIndicator)
{
// BUG 4554 We don't need the join because calpont console is now looking for "Active"
// We need to return the ack right away to let console know we got the message.
// pthread_join(startsystemthread, NULL);
// status = startsystemthreadStatus;
}
// setup MySQL Replication after switchover command
/* if (graceful == FORCEFUL)
{
log.writeLog(__LINE__, "Setup MySQL Replication for restartSystem FORCE, used by switch-parent command", LOG_TYPE_DEBUG);
oam::DeviceNetworkList devicenetworklist;
processManager.setMySQLReplication(devicenetworklist);
}
*/
log.writeLog(__LINE__, "RESTARTSYSTEM: Start System Request Completed", LOG_TYPE_INFO);
}
if (ackIndicator)
{
ackMsg.reset();
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "RESTARTSYSTEM: ACK back to sender");
}
log.writeLog(__LINE__, "Restart System Completed, status = " + oam.itoa(status), LOG_TYPE_INFO);
break;
}
case STOPPROCESS:
{
log.writeLog(__LINE__, "MSG RECEIVED: Stop Process request on " + target );
string moduleName;
msg >> moduleName;
status = API_SUCCESS;
status = processManager.stopProcess(moduleName, target, graceful, manualFlag);
log.writeLog(__LINE__, "STOPPROCESS: ACK received from Process-Monitor, return status = " + oam.itoa(status));
log.writeLog(__LINE__, "Stop Process Completed on " + moduleName + " / " + target, LOG_TYPE_INFO );
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "STOPPROCESS: ACK back to sender");
}
break;
}
case STARTPROCESS:
{
log.writeLog(__LINE__, "MSG RECEIVED: Start Process request on " + target);
string moduleName;
msg >> moduleName;
status = processManager.startProcess(moduleName, target, graceful);
log.writeLog(__LINE__, "STARTPROCESS: ACK received from Process-Monitor, return status = " + oam.itoa(status));
log.writeLog(__LINE__, "Start Process Completed on " + moduleName + " / " + target, LOG_TYPE_INFO );
// if a PrimProc was restarted, restart ACTIVE ExeMgr(s) and DDL/DMLProc
#if 0 // A RESTARTPROCESS message is about to arrive, so this is redundant.
if( target.find("PrimProc") == 0) {
//distribute config file
processManager.distributeConfigFile("system");
processManager.reinitProcessType("WriteEngineServer");
processManager.restartProcessType("ExeMgr");
processManager.reinitProcessType("DDLProc");
processManager.reinitProcessType("DMLProc");
}
// if a WriteEngineServer was restarted, restart DDL/DMLProc
if( target.find("WriteEngineServer") == 0) {
processManager.reinitProcessType("DDLProc");
processManager.reinitProcessType("DMLProc");
}
#endif
// if DDL or DMLProc, change IP Address
if( target.find("DDLProc") == 0 ||
target.find("DMLProc") == 0 ) {
processManager.setPMProcIPs(moduleName, target);
}
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "STARTPROCESS: ACK back to sender");
}
break;
}
case RESTARTPROCESS:
{
log.writeLog(__LINE__, "MSG RECEIVED: Restart Process request on " + target );
string moduleName;
msg >> moduleName;
status = processManager.restartProcess(moduleName, target, graceful, manualFlag);
// if a PrimProc was restarted, restart ACTIVE ExeMgr(s)
if( target.find("PrimProc") == 0) {
//distribute config file
processManager.distributeConfigFile("system");
processManager.reinitProcessType("WriteEngineServer");
processManager.restartProcessType("ExeMgr");
processManager.reinitProcessType("DDLProc");
processManager.reinitProcessType("DMLProc");
}
// if a WriteEngineServer was restarted, restart DDL/DMLProc
if( target.find("WriteEngineServer") == 0) {
processManager.reinitProcessType("DDLProc");
processManager.reinitProcessType("DMLProc");
}
log.writeLog(__LINE__, "RESTARTPROCESS: ACK received from Process-Monitor, return status = " + oam.itoa(status));
log.writeLog(__LINE__, "Restart Process Completed on " + moduleName + " / " + target, LOG_TYPE_INFO );
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "RESTARTPROCESS: ACK back to sender");
}
break;
}
case UPDATELOG:
{
string action;
string level;
msg >> action;
msg >> level;
log.writeLog(__LINE__, "MSG RECEIVED: " + action + " logging on " + target + " for level " + level );
status = API_SUCCESS;
if ( target == "system" ) {
// send logging message to all modules
for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
int retStatus = processManager.updateLog(action, (*pt).DeviceName, level);
if ( retStatus != API_SUCCESS)
status = retStatus;
}
}
}
else
{ // for a specific module
// validate module name
bool found = false;
for( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
if ((*pt).DeviceName == target) {
status = processManager.updateLog(action, target, level);
found = true;
break;
}
}
}
if ( found == false ) {
log.writeLog(__LINE__, "ERROR: Invalid module name: " + target, LOG_TYPE_ERROR);
status = API_INVALID_PARAMETER;
}
}
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "UPDATELOG: ACK back to sender, return status = " + oam.itoa(status));
break;
}
case GETCONFIGLOG:
{
log.writeLog(__LINE__, "MSG RECEIVED: Get Log Configuation" );
status = API_SUCCESS;
// validate module name and make request
bool found = false;
for( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
if ((*pt).DeviceName == target) {
status = processManager.getConfigLog(target);
found = true;
break;
}
}
}
if ( found == false ) {
log.writeLog(__LINE__, "ERROR: Invalid module name: " + target, LOG_TYPE_ERROR);
status = API_INVALID_PARAMETER;
}
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "GETCONFIGLOG: ACK back to sender, return status = " + oam.itoa(status));
break;
}
case REINITPROCESS:
{
log.writeLog(__LINE__, "MSG RECEIVED: Re-Init Process request..." );
string moduleName;
msg >> moduleName;
//distribute config file
processManager.distributeConfigFile(moduleName);
status = processManager.reinitProcess(moduleName, target);
log.writeLog(__LINE__, "REINITPROCESS: ACK received from Process-Monitor, return status = " + oam.itoa(status));
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "REINITPROCESS: ACK back to sender");
}
break;
}
case UPDATECONFIG:
{
log.writeLog(__LINE__, "MSG RECEIVED: Update Process Configuation" );
status = API_SUCCESS;
//distribute update of process config file
processManager.distributeConfigFile("system", "ProcessConfig.xml");
for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for (;pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
int retStatus = processManager.updateConfig((*pt).DeviceName);
if (retStatus != API_SUCCESS)
status = retStatus;
}
}
log.writeLog(__LINE__, "UPDATECONFIG: ACK back to sender, return status = " + oam.itoa(status));
break;
}
case BUILDSYSTEMTABLES:
{
log.writeLog(__LINE__, "MSG RECEIVED: Send Build System Table request to " + target);
status = processManager.buildSystemTables(target);
log.writeLog(__LINE__, "BUILDSYSTEMTABLES: ACK received from Process-Monitor, return status = " + oam.itoa(status));
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "BUILDSYSTEMTABLES: ACK back to sender");
}
break;
}
case ADDMODULE:
{
log.writeLog(__LINE__, "MSG RECEIVED: Add Module request");
string value;
uint16_t count,ivalue,nicCount;
oam::DeviceNetworkConfig devicenetworkconfig;
oam::DeviceNetworkList devicenetworklist;
oam::HostConfig hostconfig;
//get module count to add
msg >> count;
if ( count > 0 ) {
for (int i = 0; i < count; i++)
{
msg >> value;
devicenetworkconfig.DeviceName = value;
msg >> value;
devicenetworkconfig.UserTempDeviceName = value;
msg >> value;
devicenetworkconfig.DisableState = value;
msg >> nicCount;
for (int j = 0 ; j < nicCount ; j ++ )
{
msg >> value;
hostconfig.IPAddr = value;
msg >> value;
hostconfig.HostName = value;
msg >> ivalue;
hostconfig.NicID = ivalue;
devicenetworkconfig.hostConfigList.push_back(hostconfig);
}
devicenetworklist.push_back(devicenetworkconfig);
devicenetworkconfig.hostConfigList.clear();
}
string password;
msg >> password;
status = processManager.addModule(devicenetworklist, password);
log.writeLog(__LINE__, "ADDMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status));
}
else
{
status = oam::API_INVALID_PARAMETER;
log.writeLog(__LINE__, "ADDMODULE: Module Count invalid = " + oam.itoa(count));
}
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "ADDMODULE: ACK back to sender");
}
break;
}
case REMOVEMODULE:
{
log.writeLog(__LINE__, "MSG RECEIVED: Remove Module request");
uint16_t count, hostConfigCount;
string value;
oam::DeviceNetworkConfig devicenetworkconfig;
oam::DeviceNetworkList devicenetworklist;
//get module count to remove
msg >> count;
if ( count > 0 ) {
for (int i = 0; i < count; i++)
{
msg >> value;
devicenetworkconfig.DeviceName = value;
msg >> value;
devicenetworkconfig.UserTempDeviceName = value;
msg >> value;
devicenetworkconfig.DisableState = value;
devicenetworklist.push_back(devicenetworkconfig);
msg >> hostConfigCount;
}
string password;
msg >> password;
status = processManager.removeModule(devicenetworklist);
log.writeLog(__LINE__, "REMOVEMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status));
log.writeLog(__LINE__, "Remove Module Completed", LOG_TYPE_INFO);
}
else
{
status = oam::API_INVALID_PARAMETER;
log.writeLog(__LINE__, "REMOVEMODULE: Module Count invalid = " + oam.itoa(count));
}
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "REMOVEMODULE: ACK back to sender");
}
break;
}
case RECONFIGUREMODULE:
{
log.writeLog(__LINE__, "MSG RECEIVED: Reconfigure Module request");
string value;
uint16_t count,ivalue,nicCount;
oam::DeviceNetworkConfig devicenetworkconfig;
oam::DeviceNetworkList devicenetworklist;
oam::HostConfig hostconfig;
//get module count
msg >> count;
if ( count > 0 ) {
for (int i = 0; i < count; i++)
{
msg >> value;
devicenetworkconfig.DeviceName = value;
msg >> value;
devicenetworkconfig.UserTempDeviceName = value;
msg >> value;
devicenetworkconfig.DisableState = value;
msg >> nicCount;
for (int j = 0 ; j < nicCount ; j ++ )
{
msg >> value;
hostconfig.IPAddr = value;
msg >> value;
hostconfig.HostName = value;
msg >> ivalue;
hostconfig.NicID = ivalue;
devicenetworkconfig.hostConfigList.push_back(hostconfig);
}
devicenetworklist.push_back(devicenetworkconfig);
devicenetworkconfig.hostConfigList.clear();
}
string password;
msg >> password;
status = processManager.reconfigureModule(devicenetworklist);
log.writeLog(__LINE__, "RECONFIGUREMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status));
}
else
{
status = oam::API_INVALID_PARAMETER;
log.writeLog(__LINE__, "RECONFIGUREMODULE: Module Count invalid = " + oam.itoa(count));
}
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "RECONFIGUREMODULE: ACK back to sender");
}
break;
}
case STOPPROCESSTYPE:
{
log.writeLog(__LINE__, "MSG RECEIVED: Stop Process Type request: " + target);
if ( target == "DBRM" ) {
processManager.stopProcessType("DBRMControllerNode");
processManager.stopProcessType("DBRMWorkerNode");
}
else
processManager.stopProcessType(target);
log.writeLog(__LINE__, "Stop Process Type Completed", LOG_TYPE_INFO );
break;
}
case STARTPROCESSTYPE:
{
log.writeLog(__LINE__, "MSG RECEIVED: Start Process Type request: " + target);
if ( target == "DBRM" ) {
processManager.startProcessType("DBRMControllerNode");
processManager.startProcessType("DBRMWorkerNode");
}
else
processManager.startProcessType(target);
// if a PrimProc was restarted, restart ACTIVE ExeMgr(s) and DDL/DMLProc
if( target == "PrimProc" ) {
//distribute config file
processManager.distributeConfigFile("system");
processManager.reinitProcessType("WriteEngineServer");
processManager.restartProcessType("ExeMgr");
processManager.reinitProcessType("DDLProc");
processManager.reinitProcessType("DMLProc");
}
// if a WriteEngineServer was restarted, restart DDL/DMLProc
if( target.find("WriteEngineServer") == 0) {
processManager.reinitProcessType("DDLProc");
processManager.reinitProcessType("DMLProc");
}
log.writeLog(__LINE__, "Start Process Type Completed", LOG_TYPE_INFO );
break;
}
case RESTARTPROCESSTYPE:
{
log.writeLog(__LINE__, "MSG RECEIVED: Restart Process Type request: " + target);
if ( target == "DBRM" ) {
processManager.restartProcessType("DBRMControllerNode");
processManager.restartProcessType("DBRMWorkerNode");
}
else {
processManager.restartProcessType(target);
// if a PrimProc was restarted, restart ACTIVE ExeMgr(s) and DDL/DMLProc
if( target == "PrimProc" ) {
//distribute config file
processManager.distributeConfigFile("system");
processManager.reinitProcessType("WriteEngineServer");
processManager.restartProcessType("ExeMgr");
processManager.reinitProcessType("DDLProc");
processManager.reinitProcessType("DMLProc");
}
}
// if a WriteEngineServer was restarted, restart DDL/DMLProc
if( target.find("WriteEngineServer") == 0) {
processManager.reinitProcessType("DDLProc");
processManager.reinitProcessType("DMLProc");
}
log.writeLog(__LINE__, "Restart Process Type Completed", LOG_TYPE_INFO );
break;
}
case REINITPROCESSTYPE:
{
log.writeLog(__LINE__, "MSG RECEIVED: Reinit Process Type request: " + target);
status = processManager.reinitProcessType(target);
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
}
log.writeLog(__LINE__, "Reinit Process Type Completed, return status = " + oam.itoa(status));
break;
}
case DISTRIBUTECONFIG:
{
string file;
msg >> file;
log.writeLog(__LINE__, "MSG RECEIVED: Distribute Config File " + target + "/" + file);
processManager.distributeConfigFile(target, file);
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) oam::API_SUCCESS;
try {
fIos.write(ackMsg);
}
catch(...) {}
}
log.writeLog(__LINE__, "Distribute Config File Completed " + target + "/" + file);
break;
}
case SWITCHOAMPARENT:
{
log.writeLog(__LINE__, "MSG RECEIVED: Switch OAM Parent to : " + target);
// GRACEFUL_WAIT means that we are shutting down, but waiting for
// all transactions to finish or rollback as commanded. This is only set if
// there are, in fact, transactions active (or cpimport).
if (graceful == GRACEFUL_WAIT)
{
ByteStream stillWorkingMsg;
stillWorkingMsg << (ByteStream::byte) oam::ACK;
stillWorkingMsg << actionType;
stillWorkingMsg << target;
stillWorkingMsg << (ByteStream::byte) API_STILL_WORKING;
// This wait can take a while. We wait for table locks to release and open transactions to commit.
if (oam.waitForSystem(RESTARTSYSTEM, fIos, stillWorkingMsg))
{
graceful = GRACEFUL; // ProcMonitor doesn't know GRACEFUL_WAIT.
// Send an ack back to say we're done waiting and are now shutting down.
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) API_TRANSACTIONS_COMPLETE;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "SWITCHOAMPARENT: ACK transactions complete back to sender, return status = " + oam.itoa(API_TRANSACTIONS_COMPLETE));
}
else
{
// We've been cancelled.
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) API_CANCELLED;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "SWITCHOAMPARENT: ACK back to sender (canceled)");
break;
}
}
}
status = processManager.switchParentOAMModule(target);
log.writeLog(__LINE__, "Switch OAM Parent Completed", LOG_TYPE_INFO );
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
// stop myself
processManager.stopProcess(config.moduleName(), "ProcessManager", oam::FORCEFUL, true);
break;
}
case UNMOUNT:
{
log.writeLog(__LINE__, "MSG RECEIVED: Unmount dbroot : " + target);
status = processManager.unmountDBRoot(target);
log.writeLog(__LINE__, "UnMount Completed status: " + oam.itoa(status) );
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
break;
}
case MOUNT:
{
log.writeLog(__LINE__, "MSG RECEIVED: mount dbroot : " + target);
status = processManager.mountDBRoot(target);
log.writeLog(__LINE__, "Mount Completed status: " + oam.itoa(status) );
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
break;
}
case SUSPENDWRITES:
{
ByteStream::byte ackResponse = API_FAILURE;
log.writeLog(__LINE__, "MSG RECEIVED: suspend database writes");
// GRACEFUL_WAIT means that we are Suspending writes, but waiting for all
// transactions to finish or rollback as commanded. This is only set if there
// are, in fact, transactions active (or cpimport).
if (graceful == GRACEFUL_WAIT)
{
ByteStream stillWorkingMsg;
stillWorkingMsg << (ByteStream::byte) oam::ACK;
stillWorkingMsg << actionType;
stillWorkingMsg << target;
stillWorkingMsg << (ByteStream::byte) API_STILL_WORKING;
// This wait can take a while. We wait for table locks to release and open transactions to commit.
if (oam.waitForSystem(SUSPENDWRITES, fIos, stillWorkingMsg))
{
graceful = GRACEFUL; // ProcMonitor doesn't know GRACEFUL_WAIT.
// Send an ack back to say we're done waiting and are now shutting down.
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) API_TRANSACTIONS_COMPLETE;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "SUSPENDWRITES: ACK transactions complete back to sender, return status = " + oam.itoa(API_TRANSACTIONS_COMPLETE));
}
else
{
// We've been cancelled.
if (ackIndicator)
{
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) API_CANCELLED;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "SUSPENDWRITES: ACK back to sender (canceled)");
break;
}
}
}
BRM::DBRM dbrm;
dbrm.setSystemSuspended(true);
// Wait for everything to settle down
sleep(5);
// Save the BRM. This command presages a system backup. Best to have a current BRM on disk
string logdir("/var/log/mariadb/columnstore");
if (access(logdir.c_str(), W_OK) != 0) logdir = "/tmp";
string cmd = startup::StartUp::installDir() + "/bin/save_brm > " + logdir + "/save_brm.log1 2>&1";
int rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) == 0)
{
ackResponse = API_SUCCESS;
}
else
{
ackResponse = API_FAILURE_DB_ERROR;
dbrm.setSystemSuspended(false);
}
ackMsg.reset();
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << ackResponse;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "SUSPENDWRITES: ACK back to sender" + oam.itoa(ackResponse));
break;
}
case FSTABUPDATE:
{
log.writeLog(__LINE__, "MSG RECEIVED: Distribute Fstab update" );
//get fstab entry
string entry;
msg >> entry;
status = API_SUCCESS;
if ( target == "system" )
{
//send out to all pms except local module
for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType != "pm" )
continue;
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for (;pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
if ( (*pt).DeviceName == config.moduleName() )
continue;
int retStatus = processManager.updateFstab((*pt).DeviceName, entry);
if (retStatus != API_SUCCESS)
status = retStatus;
}
}
}
else
{
int retStatus = processManager.updateFstab(target, entry);
if (retStatus != API_SUCCESS)
status = retStatus;
}
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
log.writeLog(__LINE__, "FSTABUPDATE: ACK back to sender, return status = " + oam.itoa(status));
break;
}
case ENABLEMYSQLREP:
{
log.writeLog(__LINE__, "MSG RECEIVED: Enable MySQL Replication");
// target = root password
oam::DeviceNetworkList devicenetworklist;
status = processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, false, true, target);
log.writeLog(__LINE__, "Enable MySQL Replication status: " + oam.itoa(status) );
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
break;
}
case DISABLEMYSQLREP:
{
log.writeLog(__LINE__, "MSG RECEIVED: Disable MySQL Replication");
// target = root password
oam::DeviceNetworkList devicenetworklist;
status = processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, false, true, target, false);
log.writeLog(__LINE__, "Disable MySQL Replication status: " + oam.itoa(status) );
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
break;
}
case GLUSTERASSIGN:
{
string dbroot;
msg >> dbroot;
log.writeLog(__LINE__, "MSG RECEIVED: Gluster Assign DBRoot: " + dbroot);
status = processManager.glusterAssign(target, dbroot);
log.writeLog(__LINE__, "Gluster Assign DBRoot status: " + oam.itoa(status) );
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
break;
}
case GLUSTERUNASSIGN:
{
string dbroot;
msg >> dbroot;
log.writeLog(__LINE__, "MSG RECEIVED: Gluster Unassign DBRoot: " + dbroot);
status = processManager.glusterUnassign(target, dbroot);
log.writeLog(__LINE__, "Gluster Unassign DBRoot status: " + oam.itoa(status) );
ackMsg << (ByteStream::byte) oam::ACK;
ackMsg << actionType;
ackMsg << target;
ackMsg << (ByteStream::byte) status;
try {
fIos.write(ackMsg);
}
catch(...) {}
break;
}
/*
case PROCESSALARM:
{
log.writeLog(__LINE__, "MSG RECEIVED: Process Alarm Message");
ByteStream::byte alarmID;
std::string componentID;
ByteStream::byte state;
std::string ModuleName;
std::string processName;
ByteStream::byte pid;
ByteStream::byte tid;
msg >> alarmID;
msg >> componentID;
msg >> state;
msg >> ModuleName;
msg >> processName;
msg >> pid;
msg >> tid;
Alarm calAlarm;
calAlarm.setAlarmID (alarmID);
calAlarm.setComponentID (componentID);
calAlarm.setState (state);
calAlarm.setSname (ModuleName);
calAlarm.setPname (processName);
calAlarm.setPid (pid);
calAlarm.setTid (tid);
ALARMManager aManager;
aManager.processAlarmReport(calAlarm);
break;
}
*/
default:
log.writeLog(__LINE__, "MSG RECEIVED: Invalid type" );
break;
}
break;
case HEARTBEAT_REGISTER:
{
string moduleName;
string processName;
ByteStream::byte id;
msg >> moduleName;
msg >> processName;
msg >> id;
HeartBeatProc hbproc;
hbproc.ModuleName = moduleName;
hbproc.ProcessName = processName;
hbproc.ID = id;
hbproc.receiveFlag = true;
HeartBeatProcList::iterator list = hbproclist.begin();
for( ; list != hbproclist.end() ; list++)
{
if ( (*list).ModuleName == moduleName
&& (*list).ProcessName == processName
&& (*list).ID == id) {
// already in the list
break;
}
}
if ( list == hbproclist.end() ) {
// add to list
hbproclist.push_front(hbproc);
log.writeLog(__LINE__, "Adding Process to Heartbeat Monitor list: " + moduleName + " / " + processName + " / " + oam.itoa(id));
}
}
break;
case HEARTBEAT_DEREGISTER:
{
string moduleName;
string processName;
ByteStream::byte id;
msg >> moduleName;
msg >> processName;
msg >> id;
HeartBeatProcList::iterator list = hbproclist.begin();
for( ; list != hbproclist.end() ; list++)
{
if ( (*list).ModuleName == moduleName
&& (*list).ProcessName == processName
&& (*list).ID == id) {
hbproclist.erase(list);
log.writeLog(__LINE__, "Removing Process from Heartbeat Monitor list: " + moduleName + " / " + processName+ " / " + oam.itoa(id));
break;
}
}
}
break;
case HEARTBEAT_SEND:
{
string moduleName;
string processName;
string timeStamp;
ByteStream::byte id;
ByteStream::byte ackFlag;
msg >> moduleName;
msg >> processName;
msg >> timeStamp;
msg >> id;
msg >> ackFlag;
if ( ackFlag == oam::ACK_YES ) {
// send back an ack msg
ackMsg << (ByteStream::byte) HEARTBEAT_SEND;
try {
fIos.write(ackMsg);
}
catch(...) {}
//log.writeLog(__LINE__, "Heartbeat Ack message sent", LOG_TYPE_DEBUG);
}
HeartBeatProcList::iterator list = hbproclist.begin();
for( ; list != hbproclist.end() ; list++)
{
if ( (*list).ModuleName == moduleName
&& (*list).ProcessName == processName
&& (*list).ID == id) {
(*list).receiveFlag = true;
//log.writeLog(__LINE__, "Heartbeat Received: " + moduleName + " / " + processName + " / " + oam.itoa(id) + ", timestamp: " + timeStamp, LOG_TYPE_DEBUG);
break;
}
}
if ( list == hbproclist.end() ) {
// not found, add to list
HeartBeatProc hbproc;
hbproc.ModuleName = moduleName;
hbproc.ProcessName = processName;
hbproc.ID = id;
hbproc.receiveFlag = true;
hbproclist.push_front(hbproc);
log.writeLog(__LINE__, "Adding Process to Heartbeat Monitor list: " + moduleName + " / " + processName + " / " + oam.itoa(id));
}
}
break;
case PROCESSRESTART:
{
string moduleName;
string processName;
ByteStream::byte manual;
msg >> moduleName;
msg >> processName;
msg >> manual;
log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted on " + moduleName + "/" + processName);
//set query system states not ready
BRM::DBRM dbrm;
dbrm.setSystemQueryReady(false);
processManager.setQuerySystemState(false);
processManager.setSystemState(oam::BUSY_INIT);
processManager.reinitProcessType("cpimport");
//request reinit after Process is active
for ( int i = 0; i < 600 ; i++ ) {
try {
ProcessStatus procstat;
oam.getProcessStatus(processName, moduleName, procstat);
if (procstat.ProcessOpState == oam::ACTIVE) {
// if a PrimProc was restarted, reinit ACTIVE ExeMgr(s) and DDL/DMLProc
if( processName == "PrimProc") {
//distribute config file
processManager.distributeConfigFile("system");
processManager.reinitProcessType("WriteEngineServer");
processManager.reinitProcessType("ExeMgr");
processManager.reinitProcessType("DDLProc");
processManager.reinitProcessType("DMLProc");
}
// if a WriteEngineServer was restarted, restart DDL/DMLProc
if( processName == "WriteEngineServer") {
processManager.reinitProcessType("DDLProc");
processManager.reinitProcessType("DMLProc");
}
// if a ControllerNode was restarted, restart DMLProc
if( processName == "DBRMControllerNode") {
// sleep(5);
// processManager.reinitProcessType("DBRMWorkerNode");
// Wait for DBRMControllerNode to go active
ProcessStatus procstat;
uint16_t state = AUTO_OFFLINE;
while (state == oam::MAN_OFFLINE
|| state == oam::AUTO_OFFLINE
|| state == oam::MAN_INIT
|| state == oam::AUTO_INIT)
{
oam.getProcessStatus("DBRMControllerNode", config.OAMParentName(), procstat);
state = procstat.ProcessOpState;
if ( procstat.ProcessOpState == oam::ACTIVE)
break;
sleep(1);
}
processManager.restartProcessType("DDLProc");
processManager.restartProcessType("DMLProc");
sleep(1);
string DMLmodule = config.OAMParentName();
if ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) {
string PrimaryUMModuleName;
try {
oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName);
}
catch(...) {}
if ( !PrimaryUMModuleName.empty() )
DMLmodule = PrimaryUMModuleName;
}
// Wait for DMLProc to be ACTIVE
BRM::DBRM dbrm;
state = AUTO_OFFLINE;
while (state == oam::MAN_OFFLINE
|| state == oam::AUTO_OFFLINE
|| state == oam::MAN_INIT
|| state == oam::AUTO_INIT
|| state == oam::ROLLBACK_INIT)
{
oam.getProcessStatus("DMLProc", DMLmodule, procstat);
state = procstat.ProcessOpState;
if ( procstat.ProcessOpState == oam::ACTIVE)
break;
sleep(1);
}
dbrm.setSystemQueryReady(true);
}
// if a DDLProc was restarted, reinit DMLProc
if( processName == "DDLProc") {
processManager.reinitProcessType("DMLProc");
}
//only run on auto process restart
if (manual == 0 )
{
//get dbhealth flag
string DBHealthMonitorFlag = "n";
string DBFunctionalMonitorFlag;
try {
oam.getSystemConfig( "DBHealthMonitorFlag", DBHealthMonitorFlag);
}
catch(...) {
DBHealthMonitorFlag = "n";
}
//check the db health
if (DBHealthMonitorFlag == "y" ) {
log.writeLog(__LINE__, "Call the check DB Health API", LOG_TYPE_DEBUG);
try {
oam.checkDBFunctional();
log.writeLog(__LINE__, "check DB Health passed", LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "check DB Health FAILED", LOG_TYPE_ERROR);
}
}
}
break;
}
sleep(1);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
break;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
break;
}
}
//enable query stats
dbrm.setSystemQueryReady(true);
processManager.setQuerySystemState(true);
processManager.setSystemState(oam::ACTIVE);
}
break;
case GETDBRMDATA:
{
log.writeLog(__LINE__, "MSG RECEIVED: Get DBRM Data Files");
string moduleName;
msg >> moduleName;
int ret = processManager.getDBRMData(fIos, moduleName);
if ( ret == oam::API_SUCCESS )
log.writeLog(__LINE__, "Get DBRM Data Files Completed");
else
log.writeLog(__LINE__, "Get DBRM Data Files Failed");
}
break;
case GETALARMDATA:
{
log.writeLog(__LINE__, "MSG RECEIVED: Get Alarm Data Files");
string date;
msg >> date;
processManager.getAlarmData(fIos, GETALARMDATA, date);
log.writeLog(__LINE__, "Get Alarm Data Files Completed");
}
break;
case GETACTIVEALARMDATA:
{
// log.writeLog(__LINE__, "MSG RECEIVED: Get Active Alarm Data Files");
//pull off, but don't need
string date;
msg >> date;
processManager.getAlarmData(fIos, GETACTIVEALARMDATA, "");
// log.writeLog(__LINE__, "Get Active Alarm Data Files Completed");
}
break;
default:
break;
}
// sleep(5);
// requestCount--;
// log.writeLog(__LINE__, "requestCount = " + oam.itoa(requestCount), LOG_TYPE_ERROR);
fIos.close();
pthread_detach (ThreadId);
pthread_exit(0);
}
pthread_mutex_t ALARM_LOCK;
/******************************************************************************************
* @brief getAlarmData
*
* purpose: get DBRM Data and send to requester
*
******************************************************************************************/
int ProcessManager::getAlarmData(messageqcpp::IOSocket fIos, int type, std::string date)
{
ByteStream msg;
Oam oam;
pthread_mutex_lock(&ALARM_LOCK);
int returnStatus = oam::API_SUCCESS;
AlarmList alarmList;
if ( type == GETALARMDATA ) {
try {
ALARMManager sm;
sm.getAlarm(date, alarmList);
}
catch(...)
{
msg << (ByteStream::byte) oam::ACK;
msg << (ByteStream::byte) type;
msg << (ByteStream::byte) oam::API_FAILURE;
try {
fIos.write(msg);
}
catch(...) {}
pthread_mutex_unlock(&ALARM_LOCK);
return oam::API_FAILURE;
}
}
else
{
try {
ALARMManager sm;
sm.getActiveAlarm(alarmList);
}
catch(...)
{
msg << (ByteStream::byte) oam::ACK;
msg << (ByteStream::byte) type;
msg << (ByteStream::byte) oam::API_FAILURE;
try {
fIos.write(msg);
}
catch(...) {}
pthread_mutex_unlock(&ALARM_LOCK);
return oam::API_FAILURE;
}
}
msg << (ByteStream::byte) oam::ACK;
msg << (ByteStream::byte) type;
msg << (ByteStream::byte) oam::API_SUCCESS;
//number of alarms
msg << (ByteStream::byte) alarmList.size();
//log.writeLog(__LINE__, oam.itoa(alarmList.size()), LOG_TYPE_ERROR );
AlarmList :: iterator i;
for (i = alarmList.begin(); i != alarmList.end(); ++i)
{
msg << (ByteStream::doublebyte) i->second.getAlarmID();
//log.writeLog(__LINE__, oam.itoa(i->second.getAlarmID()), LOG_TYPE_ERROR );
msg << i->second.getDesc();
msg << (ByteStream::doublebyte) i->second.getSeverity();
msg << i->second.getTimestamp();
msg << i->second.getSname();
msg << i->second.getPname();
msg << i->second.getComponentID();
}
try {
fIos.write(msg);
}
catch(...) {}
pthread_mutex_unlock(&ALARM_LOCK);
return returnStatus;
}
/******************************************************************************************
* @brief buildRequestMessage
*
* purpose: Build a request message
*
******************************************************************************************/
ByteStream ProcessManager::buildRequestMessage(ByteStream::byte requestID,
ByteStream::byte actionIndicator, string processName, bool manualFlag)
{
ByteStream msg;
ByteStream::byte messageType = REQUEST;
msg << messageType;
msg << requestID;
msg << actionIndicator;
if (processName != "" )
msg << processName;
msg << (ByteStream::byte) manualFlag;
return msg;
}
/******************************************************************************************
* @brief startModule
*
* purpose: Start all processes on the specified module
*
******************************************************************************************/
int ProcessManager::startModule(string target, messageqcpp::ByteStream::byte actionIndicator, uint16_t startType, bool systemStart)
{
ByteStream msg;
ByteStream::byte requestID = STARTALL;
string processName = "";
Oam oam;
if ( startType == oam::MAN_OFFLINE )
setModuleState(target, oam::MAN_INIT);
else
setModuleState(target, oam::AUTO_INIT);
msg = buildRequestMessage(requestID, actionIndicator, processName);
int returnStatus = sendMsgProcMon( target, msg, requestID );
if ( returnStatus == API_SUCCESS)
{
setModuleState(target, oam::ACTIVE);
//clear alarm, log the event
log.writeLog(__LINE__, target + " module is started by request.", LOG_TYPE_DEBUG);
//clear an alarm
ALARMManager aManager;
aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_MANUAL, CLEAR);
aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_AUTO, CLEAR);
}
else
{
if ( returnStatus == oam::API_FAILURE || returnStatus == API_FAILURE_DB_ERROR)
setModuleState(target, oam::FAILED);
else
if ( !systemStart )
setModuleState(target, oam::FAILED);
//log the event
log.writeLog(__LINE__, target + " module failed to start!!", LOG_TYPE_DEBUG);
}
return returnStatus;
}
/******************************************************************************************
* @brief stopModule
*
* purpose: Stop all processes on the specified module
*
******************************************************************************************/
int ProcessManager::stopModule(string target, ByteStream::byte actionIndicator, bool manualFlag, int timeout)
{
Configuration config;
ProcessManager processManager(config, log);
ByteStream msg;
ByteStream::byte requestID = STOPALL;
string processName = "";
msg = buildRequestMessage(requestID, actionIndicator, processName, manualFlag);
string msgPort = target;
msgPort = msgPort + "_ProcessMonitor";
int returnStatus = API_FAILURE;
if ( actionIndicator == INSTALL && target == config.OAMParentName() ) {
// Process Manager will be taken down, do your updates now
log.writeLog(__LINE__, target + " module is stopped by request.", LOG_TYPE_DEBUG);
if ( manualFlag ) {
setModuleState(target, oam::MAN_OFFLINE);
//Issue an alarm
ALARMManager aManager;
aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_MANUAL, SET);
}
else
{
setModuleState(target, oam::AUTO_OFFLINE);
//Issue an alarm
ALARMManager aManager;
aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_AUTO, SET);
}
}
else
{
log.writeLog(__LINE__, target + " module is stopped by request.", LOG_TYPE_DEBUG);
if ( manualFlag ) {
setModuleState(target, oam::MAN_INIT);
}
else
{
setModuleState(target, oam::AUTO_INIT);
}
}
returnStatus = sendMsgProcMon( target, msg, requestID, timeout );
if ( actionIndicator != STATUS_UPDATE )
{
if ( returnStatus == API_SUCCESS)
{
//Issue an alarm, log the event
log.writeLog(__LINE__, target + " module is successfully stopped.", LOG_TYPE_DEBUG);
if ( manualFlag ) {
// setModuleState(target, oam::MAN_OFFLINE);
//Issue an alarm
ALARMManager aManager;
aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_MANUAL, SET);
}
else
{
// setModuleState(target, oam::AUTO_OFFLINE);
//Issue an alarm
ALARMManager aManager;
aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_AUTO, SET);
}
}
else
{
// if ( manualFlag ) {
// setModuleState(target, oam::FAILED);
// }
//log the event
log.writeLog(__LINE__, target + " module failed to stop!!", LOG_TYPE_WARNING);
}
}
return returnStatus;
}
/******************************************************************************************
* @brief shutdownModule
*
* purpose: power off the specified module,
*
******************************************************************************************/
int ProcessManager::shutdownModule(string target, ByteStream::byte actionIndicator, bool manualFlag, int timeout)
{
ByteStream msg;
ByteStream::byte requestID = SHUTDOWNMODULE;
string processName = "";
msg = buildRequestMessage(requestID, actionIndicator, processName, manualFlag);
int returnStatus = sendMsgProcMon( target, msg, requestID, timeout );
if ( returnStatus == API_SUCCESS)
{
//Issue an alarm, log the event
log.writeLog(__LINE__, target + " module is shutdown by request.", LOG_TYPE_DEBUG);
if ( manualFlag ) {
setModuleState(target, oam::MAN_OFFLINE);
//mark all processes running on module man-offline
setProcessStates(target, oam::MAN_OFFLINE);
//Issue an alarm
ALARMManager aManager;
aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_MANUAL, SET);
}
else
{
setModuleState(target, oam::AUTO_OFFLINE);
//mark all processes running on module auto-offline
setProcessStates(target, oam::AUTO_OFFLINE);
//Issue an alarm
ALARMManager aManager;
aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_AUTO, SET);
}
}
else
{
setModuleState(target, oam::FAILED);
//log the event
log.writeLog(__LINE__, target + " module failed to shutdown!!", LOG_TYPE_WARNING);
}
return returnStatus;
}
/******************************************************************************************
* @brief disableModule
*
* purpose: Set the Disable State on a specified module
*
******************************************************************************************/
int ProcessManager::disableModule(string target, bool manualFlag)
{
Oam oam;
ModuleConfig moduleconfig;
log.writeLog(__LINE__, "disableModule request for " + target, LOG_TYPE_DEBUG);
string moduleType = target.substr(0,MAX_MODULE_TYPE_SIZE);
pthread_mutex_lock(&THREAD_LOCK);
int newState;
string SnewState;
if ( manualFlag ) {
newState = oam::MAN_DISABLED;
SnewState = oam::MANDISABLEDSTATE;
}
else
{
newState = oam::AUTO_DISABLED;
SnewState = oam::AUTODISABLEDSTATE;
}
// skip of module already in current DISABLED state or in MAN_DISABLED state
try{
int opState = oam::ACTIVE;
bool degraded;
oam.getModuleStatus(target, opState, degraded);
if (opState == newState || opState == oam::MAN_DISABLED) {
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
// if current state is AUTO_DISABLED and new state is MAN_DISABLED
// update state to MAN_DISABLED
if (opState == oam::AUTO_DISABLED && newState == oam::MAN_DISABLED) {
try
{
oam.getSystemConfig(target, moduleconfig);
moduleconfig.DisableState = oam::MANDISABLEDSTATE;
try
{
oam.setSystemConfig(target, moduleconfig);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on setSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on setSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
pthread_mutex_unlock(&THREAD_LOCK);
setModuleState(target, oam::MAN_DISABLED);
return API_SUCCESS;
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + target + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + target + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
pthread_mutex_unlock(&THREAD_LOCK);
setModuleState(target, newState);
//set Columnstore.xml enbale state
setEnableState( target, SnewState);
log.writeLog(__LINE__, "disableModule - setEnableState", LOG_TYPE_DEBUG);
//sleep a bit to give time for the state change to apply
sleep(1);
//update PMS area if PM was disabled
if ( moduleType == "pm" ) {
if ( updatePMSconfig() != API_SUCCESS )
return API_FAILURE;
log.writeLog(__LINE__, "disableModule - Updated PM server Count", LOG_TYPE_DEBUG);
}
//Update DBRM section of Columnstore.xml
if ( updateWorkerNodeconfig() != API_SUCCESS )
return API_FAILURE;
//distribute config file
distributeConfigFile("system");
log.writeLog(__LINE__, "disableModule successfully complete for " + target, LOG_TYPE_DEBUG);
return API_SUCCESS;
}
/******************************************************************************************
* @brief recycleProcess
*
* purpose: recyle process, generally after some disable module is run
*
******************************************************************************************/
void ProcessManager::recycleProcess(string module)
{
Oam oam;
ModuleConfig moduleconfig;
log.writeLog(__LINE__, "recycleProcess request after module was disabled: " + module, LOG_TYPE_DEBUG);
string moduleType = module.substr(0,MAX_MODULE_TYPE_SIZE);
// if a UM module, send a restart on DMLProc/DDLProc to get started on another UM, if needed
string PrimaryUMModuleName;
try {
oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName);
}
catch(...) {}
//restart ExeMgrs/mysql if module is a pm
if ( moduleType == "pm" ) {
restartProcessType("ExeMgr");
restartProcessType("mysql");
}
else
restartProcessType("ExeMgr");
if ( PrimaryUMModuleName == module )
{
restartProcessType("DDLProc", module);
// restartProcessType("DDLProc", module, false);
sleep(1);
restartProcessType("DMLProc", module);
// restartProcessType("DMLProc", module, false);
}
if( moduleType == "pm" && PrimaryUMModuleName != module)
{
reinitProcessType("DDLProc");
sleep(1);
restartProcessType("DMLProc", module);
// restartProcessType("DMLProc", module, false);
}
return;
}
/******************************************************************************************
* @brief enableModule
*
* purpose: Clear the Disable State on a specified module
*
******************************************************************************************/
int ProcessManager::enableModule(string target, int state)
{
Oam oam;
ModuleConfig moduleconfig;
log.writeLog(__LINE__, "enableModule request for " + target, LOG_TYPE_DEBUG);
string moduleType = target.substr(0,MAX_MODULE_TYPE_SIZE);
if (setEnableState( target, oam::ENABLEDSTATE) != API_SUCCESS )
return API_FAILURE;
setModuleState(target, state);
//sleep a bit to give time for the state change to apply
sleep(5);
//update PMS area if PM was disabled
if ( moduleType == "pm" ) {
if ( updatePMSconfig() != API_SUCCESS )
return API_FAILURE;
log.writeLog(__LINE__, "enableModule - Updated PM server Count", LOG_TYPE_DEBUG);
}
//Update DBRM section of Columnstore.xml
if ( updateWorkerNodeconfig() != API_SUCCESS )
return API_FAILURE;
//distribute config file
distributeConfigFile("system");
//check if new module should be hot-standby
string newStandbyModule = getStandbyModule();
if ( newStandbyModule == target)
setStandbyModule(newStandbyModule);
log.writeLog(__LINE__, "enableModule request for " + target + " completed", LOG_TYPE_DEBUG);
return API_SUCCESS;
}
/******************************************************************************************
* @brief startMgrProcesses
*
* purpose: start all Mgr Controlled processes for a module
*
******************************************************************************************/
void ProcessManager::startMgrProcesses(std::string moduleName)
{
Oam oam;
SystemProcessConfig systemprocessconfig;
vector<ProcessConfig>::iterator itor;
ByteStream msg;
string modulePortName = moduleName + "_ProcessMonitor";
try{
oam.getProcessConfig(systemprocessconfig);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
string moduleType = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
while(true)
{
bool status = true;
for (itor=systemprocessconfig.processconfig.begin();
itor != systemprocessconfig.processconfig.end(); ++itor)
{
status = true;
if ((*itor).BootLaunch == MGR_LAUNCH)
{
if ((*itor).ModuleType == moduleType
|| (*itor).ModuleType == "ChildExtOAMModule"
|| ( (*itor).ModuleType == "ChildOAMModule")
|| ((*itor).ModuleType == "ParentOAMModule" && moduleName == config.OAMParentName()) )
{
int state = oam::ACTIVE;
try{
ProcessStatus procstat;
oam.getProcessStatus((*itor).ProcessName, moduleName, procstat);
state = procstat.ProcessOpState;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
continue;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
continue;
}
if ( state == oam::INITIAL ) {
msg = buildRequestMessage(START, FORCEFUL, (*itor).ProcessName);
log.writeLog(__LINE__, "Request Start of Process/Module: " + (*itor).ProcessName + " / " + moduleName, LOG_TYPE_DEBUG);
try{
MessageQueueClient mqRequest(modulePortName);
mqRequest.write(msg);
mqRequest.shutdown();
// sleep(2);
status = false;
}
catch (exception& ex)
{
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR);
}
}
}
}
} //end of for loop
if (status)
return;
} //end of while
}
/******************************************************************************************
* @brief stopProcess
*
* purpose: Stop a Process on the specified module
*
******************************************************************************************/
int ProcessManager::stopProcess(string moduleName, string processName,
messageqcpp::ByteStream::byte actionIndicator, bool manualFlag, int timeout)
{
ByteStream msg;
ByteStream::byte requestID = STOP;
msg = buildRequestMessage(requestID, actionIndicator, processName, manualFlag);
int returnStatus = sendMsgProcMon( moduleName, msg, requestID, timeout );
if ( returnStatus == API_SUCCESS)
//log the event
log.writeLog(__LINE__, processName + " process is stopped by request.", LOG_TYPE_DEBUG);
else
//log the event
log.writeLog(__LINE__, processName + " process failed to stop!!", LOG_TYPE_WARNING);
return returnStatus;
}
/******************************************************************************************
* @brief startProcess
*
* purpose: Start a Process on the specified module
*
******************************************************************************************/
int ProcessManager::startProcess(string moduleName, string processName,
messageqcpp::ByteStream::byte actionIndicator)
{
Oam oam;
//skip if module is DISABLED
int opState;
bool degraded;
try{
oam.getModuleStatus(moduleName, opState, degraded);
}
catch(...)
{}
//check if disabled
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
return API_SUCCESS;
ByteStream msg;
ByteStream::byte requestID = START;
msg = buildRequestMessage(requestID, actionIndicator, processName);
int returnStatus = sendMsgProcMon( moduleName, msg, requestID );
if ( returnStatus == API_SUCCESS)
//log the event
log.writeLog(__LINE__, moduleName + "/" + processName + " process is started by request.", LOG_TYPE_DEBUG);
else
//log the event
log.writeLog(__LINE__, moduleName + "/" + processName + " process failed to start!!", LOG_TYPE_WARNING);
return returnStatus;
}
/******************************************************************************************
* @brief restartProcess
*
* purpose: Restart a Process on the specified module
*
******************************************************************************************/
int ProcessManager::restartProcess(string moduleName, string processName,
messageqcpp::ByteStream::byte actionIndicator, bool manualFlag)
{
Oam oam;
//skip if module is DISABLED
int opState;
bool degraded;
try{
oam.getModuleStatus(moduleName, opState, degraded);
}
catch(...)
{}
//check if disabled
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
return API_SUCCESS;
ByteStream msg;
ByteStream::byte requestID = RESTART;
msg = buildRequestMessage(requestID, actionIndicator, processName, manualFlag);
int returnStatus;
// need retry due to the depend process checks
for ( int retry = 0 ; retry < 5 ; retry++)
{
returnStatus = sendMsgProcMon( moduleName, msg, requestID );
if ( returnStatus == API_SUCCESS)
{
log.writeLog(__LINE__, processName + " process is restarted by request.", LOG_TYPE_DEBUG);
return returnStatus;
}
else
log.writeLog(__LINE__, processName + " process failed to restart, will retry!!", LOG_TYPE_WARNING);
sleep(2);
}
return returnStatus;
}
/******************************************************************************************
* @brief reinitProcess
*
* purpose: Reinit a Process on the specified module
*
******************************************************************************************/
int ProcessManager::reinitProcess(string moduleName, string processName)
{
Oam oam;
//skip if module is DISABLED
int opState;
bool degraded;
try{
oam.getModuleStatus(moduleName, opState, degraded);
}
catch(...)
{}
//check if disabled
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
return API_SUCCESS;
ByteStream msg;
ByteStream::byte requestID = PROCREINITPROCESS;
ByteStream::byte actionIndicator = FORCEFUL;
msg = buildRequestMessage(requestID, actionIndicator, processName);
int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 0 );
if ( returnStatus == API_SUCCESS)
//log the event
log.writeLog(__LINE__, processName + " process is reinited by request.", LOG_TYPE_DEBUG);
else
//log the event
log.writeLog(__LINE__, processName + " process failed to reinit!!", LOG_TYPE_WARNING);
return returnStatus;
}
/******************************************************************************************
* @brief setSystemState
*
* purpose: set System State and process required alarms
*
******************************************************************************************/
void ProcessManager::setSystemState(uint16_t state)
{
ProcessLog log;
Oam oam;
ALARMManager aManager;
Configuration config;
log.writeLog(__LINE__, "Set System State = " + oamState[state], LOG_TYPE_DEBUG);
pthread_mutex_lock(&STATUS_LOCK);
try{
oam.setSystemStatus(state);
}
catch (exception& ex)
{
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR);
}
// Process Alarms
string system = "System";
if( state == oam::ACTIVE ) {
//clear alarms if set
aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_AUTO, CLEAR);
aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_MANUAL, CLEAR);
}
else {
if( state == oam::MAN_OFFLINE )
aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_MANUAL, SET);
else
if ( state == oam::AUTO_OFFLINE )
aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_AUTO, SET);
//this alarm doesnt get clear by reporter, so clear on stopage
aManager.sendAlarmReport(system.c_str(), CONN_FAILURE, CLEAR);
}
pthread_mutex_unlock(&STATUS_LOCK);
}
/******************************************************************************************
* @brief setModuleState
*
* purpose: set Module State of a specific module
*
******************************************************************************************/
void ProcessManager::setModuleState(string moduleName, uint16_t state)
{
ProcessLog log;
Oam oam;
log.writeLog(__LINE__, "Set Module " + moduleName + " State = " + oam.itoa(state), LOG_TYPE_DEBUG);
pthread_mutex_lock(&STATUS_LOCK);
try{
oam.setModuleStatus(moduleName, state);
}
catch (exception& ex)
{
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
pthread_mutex_unlock(&STATUS_LOCK);
return;
}
/******************************************************************************************
* @brief setExtdeviceState
*
* purpose: set Switch State of a specific switch
*
******************************************************************************************/
void ProcessManager::setExtdeviceState(string extDeviceName, uint16_t state)
{
ProcessLog log;
Oam oam;
log.writeLog(__LINE__, "Set Ext Device " + extDeviceName + " State = " + oam.itoa(state), LOG_TYPE_DEBUG);
pthread_mutex_lock(&STATUS_LOCK);
try{
oam.setExtDeviceStatus(extDeviceName, state);
}
catch (exception& ex)
{
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on setExtDeviceStatus: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on setExtDeviceStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
pthread_mutex_unlock(&STATUS_LOCK);
return;
}
/******************************************************************************************
* @brief setNICState
*
* purpose: set NIC State of a specific storage
*
******************************************************************************************/
void ProcessManager::setNICState(string hostName, uint16_t state)
{
ProcessLog log;
Oam oam;
log.writeLog(__LINE__, "Set NIC " + hostName + " State = " + oam.itoa(state), LOG_TYPE_DEBUG);
pthread_mutex_lock(&STATUS_LOCK);
try{
oam.setNICStatus(hostName, state);
}
catch (exception& ex)
{
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on setNICStatus: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on setNICStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
pthread_mutex_unlock(&STATUS_LOCK);
return;
}
/******************************************************************************************
* @brief setProcessState
*
* purpose: set Process State of a specific Process
*
******************************************************************************************/
int ProcessManager::setProcessState(string moduleName, string processName, uint16_t state, pid_t PID)
{
ProcessLog log;
Oam oam;
log.writeLog(__LINE__, "StatusUpdate of Process " + processName + " State = " + oam.itoa(state), LOG_TYPE_DEBUG);
try {
oam.setProcessStatus(processName, moduleName, state, PID);
}
catch (exception& ex)
{
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on setProcessStatus: " + error, LOG_TYPE_ERROR);
return oam::API_FAILURE;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on setProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
return oam::API_FAILURE;
}
return oam::API_SUCCESS;
}
/******************************************************************************************
* @brief setProcessStates
*
* purpose: set all processes running on a module to requested state
*
******************************************************************************************/
void ProcessManager::setProcessStates(std::string moduleName, uint16_t state, std::string processNameSkip )
{
ProcessLog log;
Oam oam;
log.writeLog(__LINE__, "Set All NON-MAN_OFFLINE Process for module " + moduleName + " = " + oam.itoa(state), LOG_TYPE_DEBUG);
SystemProcessConfig systemprocessconfig;
vector<ProcessConfig>::iterator itor;
//PMwithUM config
string PMwithUM = "n";
try {
oam.getSystemConfig( "PMwithUM", PMwithUM);
}
catch(...) {
PMwithUM = "n";
}
string moduleType = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
try{
oam.getProcessConfig(systemprocessconfig);
}
catch (exception& ex)
{
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
string moduleTypeSet = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
for (itor=systemprocessconfig.processconfig.begin();
itor != systemprocessconfig.processconfig.end(); ++itor)
{
if ( (*itor).ModuleType == moduleType
|| (*itor).ModuleType == "ChildExtOAMModule"
|| ( (*itor).ModuleType == "ChildOAMModule" )
|| ((*itor).ModuleType == "ParentOAMModule") )
{
if ( (*itor).ProcessName == processNameSkip )
continue;
ProcessStatus processstatus;
try {
oam.getProcessStatus((*itor).ProcessName, moduleName, processstatus);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
if (processstatus.ProcessOpState != oam::MAN_OFFLINE) {
setProcessState(moduleName, (*itor).ProcessName, state, 0);
if ( (*itor).ProcessName == "ExeMgr" || state == oam::AUTO_OFFLINE )
setProcessState(moduleName, "mysqld", state, 0);
}
}
else
{ //for for umwithpm apps, which is ExeMgr now
if ( moduleTypeSet == "pm" && PMwithUM == "y" )
{
ProcessStatus processstatus;
try {
oam.getProcessStatus("ExeMgr", moduleName, processstatus);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
if (processstatus.ProcessOpState != oam::MAN_OFFLINE) {
setProcessState(moduleName, "ExeMgr", state, 0);
if ( state == oam::AUTO_OFFLINE )
setProcessState(moduleName, "mysqld", state, 0);
}
}
}
}
return;
}
/******************************************************************************************
* @brief updateLog
*
* purpose: updatelog on a specific module
*
******************************************************************************************/
int ProcessManager::updateLog(std::string action, std::string moduleName, std::string level)
{
ByteStream msg;
ByteStream::byte requestID = PROCUPDATELOG;
msg << requestID;
msg << action;
msg << level;
int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 );
if ( returnStatus == API_SUCCESS)
{
//log the success event
log.writeLog(__LINE__, moduleName + " updateLog by request.", LOG_TYPE_DEBUG);
}
else
{
//log the error event
log.writeLog(__LINE__, moduleName + " updateLog failed!!", LOG_TYPE_WARNING);
}
return returnStatus;
}
/******************************************************************************************
* @brief getConfigLog
*
* purpose: get Log Configation on a specific module
*
******************************************************************************************/
int ProcessManager::getConfigLog(std::string moduleName)
{
ByteStream msg;
ByteStream::byte requestID = PROCGETCONFIGLOG;
msg << requestID;
int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 );
return returnStatus;
}
/******************************************************************************************
* @brief updateConfig
*
* purpose: Send Msg to Process-Monitor to re-read updated Configation data
*
******************************************************************************************/
int ProcessManager::updateConfig(std::string moduleName)
{
ByteStream msg;
ByteStream::byte requestID = PROCUPDATECONFIG;
msg << requestID;
int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 );
return returnStatus;
}
/******************************************************************************************
* @brief buildSystemTables
*
* purpose: Send a Message to 'pm1' to check and build System Table
*
******************************************************************************************/
int ProcessManager::buildSystemTables(string target)
{
ByteStream msg;
ByteStream::byte requestID = PROCBUILDSYSTEMTABLES;
msg << requestID;
int returnStatus = sendMsgProcMon( target, msg, requestID );
return returnStatus;
}
/******************************************************************************************
* @brief updateFstab
*
* purpose: send Fstab Update to a specific module
*
******************************************************************************************/
int ProcessManager::updateFstab(std::string moduleName, std::string entry)
{
ByteStream msg;
ByteStream::byte requestID = PROCFSTABUPDATE;
msg << requestID;
msg << entry;
int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 );
return returnStatus;
}
/******************************************************************************************
* @brief stopProcessType
*
* purpose: Stops a type of process within the system
*
******************************************************************************************/
int ProcessManager::stopProcessType( std::string processName, bool manualFlag )
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
SystemProcessStatus systemprocessstatus;
ProcessStatus processstatus;
log.writeLog(__LINE__, "stopProcessType: Stop all " + processName, LOG_TYPE_DEBUG);
try {
oam.getProcessStatus(systemprocessstatus);
for( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++)
{
if ( systemprocessstatus.processstatus[i].ProcessName == processName) {
// found one, request restart of it
processManager.stopProcess(systemprocessstatus.processstatus[i].Module,
processName,
GRACEFUL,
manualFlag, 0);
// log.writeLog(__LINE__, "stopProcessType: Start ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG);
}
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
return API_FAILURE;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
return API_FAILURE;
}
return API_SUCCESS;
}
/******************************************************************************************
* @brief startProcessType
*
* purpose: Starts a type of process within the system
*
******************************************************************************************/
int ProcessManager::startProcessType( std::string processName )
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
SystemProcessStatus systemprocessstatus;
ProcessStatus processstatus;
log.writeLog(__LINE__, "StartProcessType: Start all " + processName, LOG_TYPE_DEBUG);
try
{
oam.getProcessStatus(systemprocessstatus);
for( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++)
{
if ( systemprocessstatus.processstatus[i].ProcessName == processName) {
// found one, request restart of it
int retStatus = processManager.startProcess(systemprocessstatus.processstatus[i].Module,
processName,
FORCEFUL);
log.writeLog(__LINE__, "StartProcessType: Start ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG);
}
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
return API_FAILURE;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
return API_FAILURE;
}
return API_SUCCESS;
}
/******************************************************************************************
* @brief restartProcessType
*
* purpose: Restarts ACTIVE type of process within the system
*
******************************************************************************************/
int ProcessManager::restartProcessType( std::string processName, std::string skipModule, bool manualFlag )
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
SystemProcessStatus systemprocessstatus;
ProcessStatus processstatus;
int retStatus = API_SUCCESS;
log.writeLog(__LINE__, "restartProcessType: Restart all " + processName, LOG_TYPE_DEBUG);
//PMwithUM config
string PMwithUM = "n";
try {
oam.getSystemConfig( "PMwithUM", PMwithUM);
}
catch(...) {
PMwithUM = "n";
}
// If mysql is the processName, then send to modules were ExeMgr is running
try
{
oam.getProcessStatus(systemprocessstatus);
for( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++)
{
//check for skipModule
if ( systemprocessstatus.processstatus[i].Module == skipModule )
continue;
if ( processName == "mysql" ) {
if ( systemprocessstatus.processstatus[i].ProcessName == "ExeMgr") {
ProcessStatus procstat;
oam.getProcessStatus("mysqld", systemprocessstatus.processstatus[i].Module, procstat);
int state = procstat.ProcessOpState;
if ( state == ACTIVE ) {
retStatus = processManager.restartProcess(systemprocessstatus.processstatus[i].Module,
processName,
FORCEFUL,
true);
log.writeLog(__LINE__, "restartProcessType: Start ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG);
}
}
}
else
{
if ( systemprocessstatus.processstatus[i].ProcessName == processName ) {
//skip if in a BUSY_INIT state
if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::BUSY_INIT ||
systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_INIT ||
systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_INIT ||
( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY && !manualFlag ) )
continue;
if( processName.find("DDLProc") == 0 ||
processName.find("DMLProc") == 0 ) {
string procModuleType = systemprocessstatus.processstatus[i].Module.substr(0,MAX_MODULE_TYPE_SIZE);
if ( procModuleType == "pm" && PMwithUM == "y" )
continue;
try {
oam.setSystemConfig("PrimaryUMModuleName", systemprocessstatus.processstatus[i].Module);
processManager.setPMProcIPs(systemprocessstatus.processstatus[i].Module);
//distribute config file
processManager.distributeConfigFile("system");
sleep(1);
}
catch(...) {}
}
// found one, request restart of it
retStatus = processManager.restartProcess(systemprocessstatus.processstatus[i].Module,
processName,
FORCEFUL,
true);
log.writeLog(__LINE__, "restartProcessType: Start ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG);
// if DDL or DMLProc, change IP Address
if ( retStatus == oam::API_SUCCESS )
{
if( processName.find("DDLProc") == 0 ||
processName.find("DMLProc") == 0 ) {
processManager.setPMProcIPs(systemprocessstatus.processstatus[i].Module, processName);
return retStatus;
}
}
}
}
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
return API_FAILURE;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
return API_FAILURE;
}
return retStatus;
}
/******************************************************************************************
* @brief reinitProcessType
*
* purpose: Reinit ACTIVE type of process within the system
*
******************************************************************************************/
int ProcessManager::reinitProcessType( std::string processName )
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
SystemProcessStatus systemprocessstatus;
ProcessStatus processstatus;
int retStatus = API_SUCCESS;
log.writeLog(__LINE__, "reinitProcessType: ReInit all " + processName, LOG_TYPE_DEBUG);
try
{
oam.getProcessStatus(systemprocessstatus);
// re-init cpimport on all nodes
if ( processName == "cpimport" ) {
for( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++)
{
if ( systemprocessstatus.processstatus[i].ProcessName == "ServerMonitor" ) {
// found one, request reinit of it
retStatus = processManager.reinitProcess(systemprocessstatus.processstatus[i].Module,
"cpimport");
log.writeLog(__LINE__, "reinitProcessType: ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG);
}
}
}
else
{
for( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++)
{
if ( systemprocessstatus.processstatus[i].ProcessName == processName &&
systemprocessstatus.processstatus[i].ProcessOpState == oam::ACTIVE ) {
// found one, request reinit of it
retStatus = processManager.reinitProcess(systemprocessstatus.processstatus[i].Module,
processName);
log.writeLog(__LINE__, "reinitProcessType: ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG);
}
}
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
return API_FAILURE;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
return API_FAILURE;
}
return retStatus;
}
/******************************************************************************************
* @brief addModule
*
* purpose: Add Module to system configuration
*
******************************************************************************************/
int ProcessManager::addModule(oam::DeviceNetworkList devicenetworklist, std::string password, bool manualFlag)
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
SystemModuleTypeConfig systemmoduletypeconfig;
ModuleTypeConfig moduletypeconfig;
ModuleTypeConfig setmoduletypeconfig;
DeviceNetworkConfig devicenetworkconfig;
Oam oam;
string Section;
string installDir = startup::StartUp::installDir();
pthread_mutex_lock(&THREAD_LOCK);
int AddModuleCount = devicenetworklist.size();
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
string moduleType = (*listPT).DeviceName.substr(0,MAX_MODULE_TYPE_SIZE);
//
//Check hostname and IP Address for availibility
//
try
{
oam.getSystemConfig(systemmoduletypeconfig);
for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
if( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() )
// end of list
break;
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType;
if ( moduleCount > 0 )
{
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++)
{
HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();
for( ; pt1 != (*pt).hostConfigList.end() ; pt1++)
{
string hostname = (*pt1).HostName;
if ( hostname == oam::UnassignedName )
continue;
string ipAddr = (*pt1).IPAddr;
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
string newHostName = (*pt1).HostName;
string newIPAddr = (*pt1).IPAddr;
if ( newIPAddr == ipAddr || newHostName == hostname ) {
log.writeLog(__LINE__, "addModule - ERROR: hostName or IP address already in-use: " + newIPAddr + "/" + newHostName, LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_INVALID_PARAMETER;
}
}
}
}
}
}
}
catch (exception& e)
{
log.writeLog(__LINE__, "addModule - ERROR: getSystemConfig", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
string calpontPackage;
string systemID;
string packageType = "rpm";
try
{
oam.getSystemConfig("EEPackageType", packageType);
}
catch (...)
{
log.writeLog(__LINE__, "addModule - ERROR: get EEPackageType", LOG_TYPE_ERROR);
}
//
// check for RPM package
//
SystemSoftware systemsoftware;
try
{
oam.getSystemSoftware(systemsoftware);
}
catch (exception& e)
{
log.writeLog(__LINE__, "addModule - ERROR: getSystemSoftware", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
//check if pkgs are located in /root directory
string homedir = "/root";
if (!rootUser) {
char* p= getenv("HOME");
if (p && *p)
homedir = p;
}
if ( packageType == "rpm")
calpontPackage = homedir + "/mariadb-columnstore*" + systemsoftware.Version + "-" + systemsoftware.Release + "*.rpm.tar.gz";
else
if ( packageType == "deb")
calpontPackage = homedir + "/mariadb-columnstore*" + systemsoftware.Version + "-" + systemsoftware.Release + "*.deb.tar.gz";
else
calpontPackage = homedir + "/mariadb-columnstore*" + systemsoftware.Version + "-" + systemsoftware.Release + "*.bin.tar.gz";
string cmd = "ls " + calpontPackage + " > /dev/null 2>&1";
int rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) != 0) {
log.writeLog(__LINE__, "addModule - ERROR: Package not found: " + calpontPackage, LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FILE_OPEN_ERROR;
}
log.writeLog(__LINE__, "addModule - Columnstore Package found:" + calpontPackage, LOG_TYPE_DEBUG);
//
// Verify Host IP and Password
//
if ( password == "ssh" && amazon )
{ // check if there is a root password stored
string rpw = oam::UnassignedName;
try
{
oam.getSystemConfig("rpw", rpw);
}
catch(...)
{
rpw = "mariadb1";
}
if (rpw != oam::UnassignedName)
password = rpw;
}
if ( amazon ) {
//remove know_host which shows up if you addmodule/removemodule/addmodule
string file = homedir + "/.ssh/known_hosts";
unlink (file.c_str());
}
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
string newHostName = (*pt1).HostName;
if ( newHostName == oam::UnassignedName )
continue;
string newIPAddr = (*pt1).IPAddr;
string cmd = installDir + "/bin/remote_command.sh " + newIPAddr + " " + password + " ls";
log.writeLog(__LINE__, cmd, LOG_TYPE_DEBUG);
int rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) != 0) {
log.writeLog(__LINE__, "addModule - ERROR: Remote login test failed, Invalid IP / Password " + newIPAddr, LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
log.writeLog(__LINE__, "addModule - Remote login test successful: " + newIPAddr, LOG_TYPE_DEBUG);
}
//
//Get System Configuration file
//
try{
oam.getSystemConfig(moduleType, moduletypeconfig);
}
catch(...)
{
log.writeLog(__LINE__, "addModule - ERROR: getSystemConfig", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
setmoduletypeconfig = moduletypeconfig;
// update Module Type Count
int oldModuleCount = moduletypeconfig.ModuleCount;
int newModuleCount = oldModuleCount + AddModuleCount;
setmoduletypeconfig.ModuleCount = newModuleCount;
//add new IP Addresses and Hostnames
listPT = devicenetworklist.begin();
HostConfig hostconfig;
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleName = (*listPT).DeviceName;
devicenetworkconfig.DeviceName = (*listPT).DeviceName;
devicenetworkconfig.DisableState = oam::MANDISABLEDSTATE;
HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
for( ; pt1 != (*listPT).hostConfigList.end() ; pt1++)
{
string hostName = (*pt1).HostName;
string IPAddr = (*pt1).IPAddr;
//if cloud and unassigned, launch a new Instance
if ( ( cloud == "amazon-ec2" && hostName == oam::UnassignedName ) ||
( cloud == "amazon-vpc" && hostName == oam::UnassignedName ) )
{
string UMinstanceType;
string UMSecurityGroup;
if ( moduleType == "um")
{
try{
oam.getSystemConfig("UMInstanceType", UMinstanceType);
oam.getSystemConfig("UMSecurityGroup", UMSecurityGroup);
}
catch(...) {}
}
log.writeLog(__LINE__, "addModule - Launching a new Instance for: " + moduleName, LOG_TYPE_DEBUG);
if ( moduleType == "um" )
hostName = oam.launchEC2Instance(moduleName, IPAddr, UMinstanceType, UMSecurityGroup);
else
hostName = oam.launchEC2Instance(moduleName, IPAddr);
if ( hostName == "failed" ) {
log.writeLog(__LINE__, "addModule - Launch New Instance Failure", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
// add instance tag
string systemName;
string AmazonAutoTagging;
{
try{
oam.getSystemConfig("SystemName", systemName);
oam.getSystemConfig("AmazonAutoTagging", AmazonAutoTagging);
}
catch(...) {}
}
if ( AmazonAutoTagging == "y" )
{
string tagValue = systemName + "-" + moduleName;
oam.createEC2tag( hostName, "Name", tagValue );
}
//wait until login is success until continuing or fail if can't login
log.writeLog(__LINE__, "addModule - Successfully Launch of new Instance, retry login test: " + moduleName, LOG_TYPE_DEBUG);
int retry = 0;
for ( ; retry < 18 ; retry++)
{
IPAddr = oam.getEC2InstanceIpAddress(hostName);
if (IPAddr == "terminated") {
log.writeLog(__LINE__, "addModule - Failed to log in to Instance, it was terminated: " + hostName, LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
if (IPAddr == "stopped") {
sleep(10);
continue;
}
string cmd = installDir + "/bin/remote_command.sh " + IPAddr + " " + password + " 'ls' 1 > /tmp/login_test.log";
system(cmd.c_str());
if (!oam.checkLogStatus("/tmp/login_test.log", "README")) {
log.writeLog(__LINE__, "addModule - login failed, retry login test: " + moduleName, LOG_TYPE_DEBUG);
sleep(10);
continue;
}
// logged in
break;
}
if ( retry >= 18 )
{
log.writeLog(__LINE__, "addModule - Failed to log in to Instance: " + hostName, LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
log.writeLog(__LINE__, "addModule - Successful loggin: " + hostName, LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "addModule - Launched new Instance: " + hostName + "/" + IPAddr, LOG_TYPE_DEBUG);
(*pt1).HostName = hostName;
(*pt1).IPAddr = IPAddr;
//check if any volumes need to be attached
if ( moduleType == "um" )
{
string UMStorageType = "internal";
{
try{
oam.getSystemConfig("UMStorageType", UMStorageType);
}
catch(...) {}
}
if ( UMStorageType == "external" )
{ //check if volume already assigned or need to create a new one
int moduleID = atoi((*listPT).DeviceName.substr(MAX_MODULE_TYPE_SIZE,MAX_MODULE_ID_SIZE).c_str());
string volumeNameID = "UMVolumeName" + oam.itoa(moduleID);
string volumeName = oam::UnassignedName;
string deviceNameID = "UMVolumeDeviceName" + oam.itoa(moduleID);
string deviceName = oam::UnassignedName;
try {
oam.getSystemConfig( volumeNameID, volumeName);
oam.getSystemConfig( deviceNameID, deviceName);
}
catch(...)
{}
if ( volumeName.empty() || volumeName == oam::UnassignedName ) {
// need to create a new one
string device;
try{
oam.addUMdisk(moduleID, volumeName, device);
}
catch(...) {
log.writeLog(__LINE__, "addModule: volume create failed for um: " + moduleName, LOG_TYPE_CRITICAL);
pthread_mutex_unlock(&THREAD_LOCK);
}
//attach to UM
log.writeLog(__LINE__, "addModule - attach new Volume to " + moduleName, LOG_TYPE_DEBUG);
if (!oam.attachEC2Volume(volumeName, device, hostName)) {
log.writeLog(__LINE__, "addModule: volume failed to attach to um: " + moduleName, LOG_TYPE_CRITICAL);
pthread_mutex_unlock(&THREAD_LOCK);
}
try {
Config* sysConfig = Config::makeConfig();
sysConfig->setConfig("Installation", volumeNameID, volumeName);
sysConfig->setConfig("Installation", deviceNameID, device);
sysConfig->write();
}
catch(...)
{}
log.writeLog(__LINE__, "addModule - create/attach new volume: " + volumeName + "/" + device, LOG_TYPE_DEBUG);
}
else
{ // one exist, detach and reattach it
oam.detachEC2Volume( volumeName );
if (!oam.attachEC2Volume(volumeName, deviceName, hostName)) {
log.writeLog(__LINE__, "addModule: volume failed to attached: " + volumeName, LOG_TYPE_CRITICAL);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
log.writeLog(__LINE__, "addModule - attach existing volume: " + volumeName + "/" + deviceName, LOG_TYPE_DEBUG);
}
}
}
}
hostconfig.HostName = hostName;
hostconfig.IPAddr = IPAddr;
hostconfig.NicID = (*pt1).NicID;
devicenetworkconfig.hostConfigList.push_back(hostconfig);
}
setmoduletypeconfig.ModuleNetworkList.push_back(devicenetworkconfig);
}
Config* sysConfig = Config::makeConfig();
//Add additional Process Ports
// all nodes: ProcessMonitor, ServerMonitor
// dm: NONE
// um: ExeMgr
// pm: NONE
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
Section = (*listPT).DeviceName + "_ProcessMonitor";
HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
sysConfig->setConfig(Section, "IPAddr", (*pt1).IPAddr);
sysConfig->setConfig(Section, "Port", "8800");
Section = (*listPT).DeviceName + "_ServerMonitor";
sysConfig->setConfig(Section, "IPAddr", (*pt1).IPAddr);
sysConfig->setConfig(Section, "Port", "8622");
}
if ( moduleType == "um" ||
( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) ||
( moduleType == "pm" && PMwithUM == "y") ) {
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
int moduleID = atoi((*listPT).DeviceName.substr(MAX_MODULE_TYPE_SIZE,MAX_MODULE_ID_SIZE).c_str());
int exemgrID = moduleID;
if ( PMwithUM == "y" )
{ // then go check for next available ID
exemgrID = 0;
for ( int id = 2 ; ; id++ )
{
string Section = "ExeMgr" + oam.itoa(id);
string moduleName;
try {
Config* sysConfig = Config::makeConfig();
moduleName = sysConfig->getConfig(Section, "Module");
}
catch (...) {}
if ( moduleName.empty() )
{
exemgrID = id;
break;
}
}
}
Section = "ExeMgr" + oam.itoa(exemgrID);
HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
sysConfig->setConfig(Section, "IPAddr", (*pt1).IPAddr);
sysConfig->setConfig(Section, "Port", "8601");
sysConfig->setConfig(Section, "Module", (*listPT).DeviceName);
}
}
if ( moduleType == "pm" ) {
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
Section = (*listPT).DeviceName + "_WriteEngineServer";
HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
sysConfig->setConfig(Section, "IPAddr", (*pt1).IPAddr);
sysConfig->setConfig(Section, "Port", "8630");
}
}
log.writeLog(__LINE__, "addModule - Updated Process Ports", LOG_TYPE_DEBUG);
string parentOAMModuleHostName;
string parentOAMModuleIPAddr;
//setup dbroot entries
if (moduleType == "pm" && manualFlag)
{
const string MODULE_DBROOTID = "ModuleDBRootID";
const string MODULE_DBROOT_COUNT = "ModuleDBRootCount";
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleID = (*listPT).DeviceName.substr(MAX_MODULE_TYPE_SIZE,MAX_MODULE_ID_SIZE);
string ModuleDBRootCount = MODULE_DBROOT_COUNT + moduleID + "-3";
sysConfig->setConfig("SystemModuleConfig", ModuleDBRootCount, "0");
string ModuleDBrootID = MODULE_DBROOTID + moduleID + "-1-3";
sysConfig->setConfig("SystemModuleConfig", ModuleDBrootID, oam::UnassignedName);
}
}
//update Columnstore Config table
try {
sysConfig->write();
}
catch(...)
{
log.writeLog(__LINE__, "addModule - ERROR: sysConfig->write", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
//write Columnstore.xml Module section
try {
oam.setSystemConfig(moduleType, setmoduletypeconfig);
log.writeLog(__LINE__, "addModule - Updated Module Section of Config file", LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "addModule - ERROR: setSystemConfig", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
pthread_mutex_unlock(&THREAD_LOCK);
//check if any added modules are Active OAM
bool activeOAM = false;
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
if ( (*listPT).DeviceName == config.OAMParentName() ) {
activeOAM = true;
break;
}
}
//
//send message to Process Monitor to add module/processes to shared memory
//
if ( !activeOAM )
{
try
{
ByteStream obs;
obs << (ByteStream::byte) ADD_MODULE;
obs << (ByteStream::byte) AddModuleCount;
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
obs << (*listPT).DeviceName;
}
//pass NIC Hostnames
vector<string> nicHostNames;
listPT = devicenetworklist.begin();
HostConfig hostconfig;
for( ; listPT != devicenetworklist.end() ; listPT++)
{
HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
for( ; pt1 != (*listPT).hostConfigList.end() ; pt1++)
{
nicHostNames.push_back((*pt1).HostName);
}
}
obs << (ByteStream::byte) nicHostNames.size();
vector<string>::iterator pt2 = nicHostNames.begin();
for( ; pt2 != nicHostNames.end() ; pt2++)
{
obs << *pt2;
}
sendStatusUpdate(obs, ADD_MODULE);
log.writeLog(__LINE__, "addModule - Updated Shared Memory", LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "addModule - ERROR: sendStatusUpdate error", LOG_TYPE_ERROR);
return API_FAILURE;
}
}
//PMwithUM config
string PMwithUM = "n";
try {
oam.getSystemConfig( "PMwithUM", PMwithUM);
}
catch(...) {
PMwithUM = "n";
}
//check mysql port changes
string MySQLPort;
try {
oam.getSystemConfig( "MySQLPort", MySQLPort);
}
catch(...)
{}
if ( MySQLPort.empty() || MySQLPort == "" || MySQLPort == oam::UnassignedName )
MySQLPort = "3306";
string version = systemsoftware.Version + "-" + systemsoftware.Release;
//setup and push custom OS files
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string remoteModuleName = (*listPT).DeviceName;
string remoteModuleType = remoteModuleName.substr(0,MAX_MODULE_TYPE_SIZE);
HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
string remoteModuleIP = (*pt1).IPAddr;
string remoteHostName = (*pt1).HostName;
//create and copy custom OS
//run remote installer script
string dir = installDir + "/local/etc/" + remoteModuleName;
string cmd = "mkdir " + dir + " > /dev/null 2>&1";
system(cmd.c_str());
if ( remoteModuleType == "um" ) {
cmd = "cp " + installDir + "/local/etc/um1/* " + dir + "/.";
system(cmd.c_str());
}
else
{
if ( remoteModuleType == "pm") {
cmd = "cp " + installDir + "/local/etc/pm1/* " + dir + "/.";
system(cmd.c_str());
}
}
log.writeLog(__LINE__, "addModule - created directory and custom OS files for " + remoteModuleName, LOG_TYPE_DEBUG);
//create module file
if( !createModuleFile(remoteModuleName) ) {
log.writeLog(__LINE__, "addModule - ERROR: createModuleFile failed", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
log.writeLog(__LINE__, "addModule - create module file for " + remoteModuleName, LOG_TYPE_DEBUG);
if ( remoteModuleType == "pm" ) {
//setup Standby OAM Parent, if needed
if ( config.OAMStandbyName() == oam::UnassignedName )
setStandbyModule(remoteModuleName, false);
}
//set root password
if (amazon) {
cmd = startup::StartUp::installDir() + "/bin/remote_command.sh " + remoteModuleIP + " " + password + " '/root/.scripts/updatePassword.sh " + password + "' > /tmp/password_change.log";
log.writeLog(__LINE__, "addModule - cmd: " + cmd, LOG_TYPE_DEBUG);
rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) == 0)
log.writeLog(__LINE__, "addModule - update root password: " + remoteModuleName, LOG_TYPE_DEBUG);
else
log.writeLog(__LINE__, "addModule - ERROR: update root password: " + remoteModuleName, LOG_TYPE_DEBUG);
}
//default
string binaryInstallDir = installDir;
//run installer on remote module
if ( remoteModuleType == "um" ||
( remoteModuleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) ||
( remoteModuleType == "pm" && PMwithUM == "y" ) ) {
//run remote installer script
if ( packageType != "binary" ) {
log.writeLog(__LINE__, "addModule - user_installer run for " + remoteModuleName, LOG_TYPE_DEBUG);
string cmd = installDir + "/bin/user_installer.sh " + remoteModuleName + " " + remoteModuleIP + " " + password + " " + version + " initial " + packageType + " --nodeps none " + MySQLPort + " 1 > /tmp/user_installer.log";
log.writeLog(__LINE__, "addModule cmd: " + cmd, LOG_TYPE_DEBUG);
bool passed = false;
for ( int retry = 0 ; retry < 20 ; retry++ )
{
rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) != 0) {
// if log file size is zero, retry
ifstream in("/tmp/user_installer.log");
in.seekg(0, std::ios::end);
int size = in.tellg();
if ( size == 0 )
{
log.writeLog(__LINE__, "addModule - ERROR: user_installer.sh failed, retry", LOG_TYPE_DEBUG);
sleep(5);
continue;
}
else
break;
}
else
{
passed = true;
break;
}
}
if ( !passed )
{
log.writeLog(__LINE__, "addModule - ERROR: user_installer.sh failed", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
system("/bin/cp -f /tmp/user_installer.log /tmp/user_installer.log.failed");
processManager.setModuleState(remoteModuleName, oam::FAILED);
return API_FAILURE;
}
}
else
{ // do a binary package install
log.writeLog(__LINE__, "addModule - binary_installer run for " + remoteModuleName, LOG_TYPE_DEBUG);
string binservertype = oam.itoa(config.ServerInstallType());
if ( PMwithUM == "y" )
binservertype = "pmwithum";
string cmd = installDir + "/bin/binary_installer.sh " + remoteModuleName + " " + remoteModuleIP + " " + password + " " + calpontPackage + " " + remoteModuleType + " initial " + binservertype + " " + MySQLPort + " 1 " + binaryInstallDir + " > /tmp/binary_installer.log";
log.writeLog(__LINE__, "addModule - " + cmd, LOG_TYPE_DEBUG);
bool passed = false;
for ( int retry = 0 ; retry < 20 ; retry++ )
{
rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) != 0) {
// if log file size is zero, retry
ifstream in("/tmp/binary_installer.log");
in.seekg(0, std::ios::end);
int size = in.tellg();
if ( size == 0 )
{
log.writeLog(__LINE__, "addModule - ERROR: binary_installer.sh failed, retry", LOG_TYPE_DEBUG);
sleep(5);
continue;
}
else
break;
}
else
{
passed = true;
break;
}
}
if ( !passed )
{
log.writeLog(__LINE__, "addModule - ERROR: binary_installer.sh failed", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
system("/bin/cp -f /tmp/binary_installer.log /tmp/binary_installer.log.failed");
processManager.setModuleState(remoteModuleName, oam::FAILED);
return API_FAILURE;
}
}
}
else
{
if ( remoteModuleType == "pm" ) {
if ( packageType != "binary" ) {
log.writeLog(__LINE__, "addModule - performance_installer run for " + remoteModuleName, LOG_TYPE_DEBUG);
string cmd = installDir + "/bin/performance_installer.sh " + remoteModuleName + " " + remoteModuleIP + " " + password + " " + version + " initial " + packageType + + " --nodeps 1 > /tmp/performance_installer.log";
log.writeLog(__LINE__, "addModule cmd: " + cmd, LOG_TYPE_DEBUG);
rtnCode = system(cmd.c_str());
bool passed = false;
for ( int retry = 0 ; retry < 20 ; retry++ )
{
rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) != 0) {
// if log file size is zero, retry
ifstream in("/tmp/performance_installer.log");
in.seekg(0, std::ios::end);
int size = in.tellg();
if ( size == 0 )
{
log.writeLog(__LINE__, "addModule - ERROR: performance_installer.sh failed, retry", LOG_TYPE_DEBUG);
sleep(5);
continue;
}
else
break;
}
else
{
passed = true;
break;
}
}
if ( !passed )
{
log.writeLog(__LINE__, "addModule - ERROR: performance_installer.sh failed", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
system("/bin/cp -f /tmp/performance_installer.log /tmp/performance_installer.log.failed");
processManager.setModuleState(remoteModuleName, oam::FAILED);
return API_FAILURE;
}
}
else
{ // do a binary package install
log.writeLog(__LINE__, "addModule - binary_installer run for " + remoteModuleName, LOG_TYPE_DEBUG);
string binservertype = oam.itoa(config.ServerInstallType());
if ( PMwithUM == "y" )
binservertype = "pmwithum";
string cmd = installDir + "/bin/binary_installer.sh " + remoteModuleName + " " + remoteModuleIP + " " + password + " " + calpontPackage + " " + remoteModuleType + " initial " + binservertype + " " + MySQLPort + " 1 " + binaryInstallDir + " > /tmp/binary_installer.log";
log.writeLog(__LINE__, "addModule - " + cmd, LOG_TYPE_DEBUG);
bool passed = false;
for ( int retry = 0 ; retry < 20 ; retry++ )
{
rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) != 0) {
// if log file size is zero, retry
ifstream in("/tmp/binary_installer.log");
in.seekg(0, std::ios::end);
int size = in.tellg();
if ( size == 0 )
{
log.writeLog(__LINE__, "addModule - ERROR: binary_installer.sh failed, retry", LOG_TYPE_DEBUG);
sleep(5);
continue;
}
else
break;
}
else
{
passed = true;
break;
}
}
if ( !passed )
{
log.writeLog(__LINE__, "addModule - ERROR: binary_installer.sh failed", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
system("/bin/cp -f /tmp/binary_installer.log /tmp/binary_installer.log.failed");
processManager.setModuleState(remoteModuleName, oam::FAILED);
return API_FAILURE;
}
}
}
}
}
//Start new modules by starting up local Process-Monitor
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string remoteModuleName = (*listPT).DeviceName;
if (manualFlag)
//set new module to disable state if manual add
disableModule(remoteModuleName, true);
HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
string remoteModuleIP = (*pt1).IPAddr;
string remoteHostName = (*pt1).HostName;
//send start service commands
string cmd = installDir + "/bin/remote_command.sh " + remoteModuleIP + " " + password + " '" + installDir + "/bin/columnstore restart;" + installDir + "/mysql/mysqld-Columnstore restart' 0";
system(cmd.c_str());
log.writeLog(__LINE__, "addModule - restart columnstore service " + remoteModuleName, LOG_TYPE_DEBUG);
// add to monitor list
moduleInfoList.insert(moduleList::value_type(remoteModuleName, 0));
if (amazon) {
//check and assign Elastic IP Address
int AmazonElasticIPCount = 0;
try{
oam.getSystemConfig("AmazonElasticIPCount", AmazonElasticIPCount);
}
catch(...) {
AmazonElasticIPCount = 0;
}
for ( int id = 1 ; id < AmazonElasticIPCount+1 ; id++ )
{
string AmazonElasticModule = "AmazonElasticModule" + oam.itoa(id);
string ELmoduleName;
try{
oam.getSystemConfig(AmazonElasticModule, ELmoduleName);
}
catch(...) {}
if ( ELmoduleName == remoteModuleName )
{ //match found assign Elastic IP Address
string AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(id);
string ELIPaddress;
try{
oam.getSystemConfig(AmazonElasticIPAddr, ELIPaddress);
}
catch(...) {}
try{
oam.assignElasticIP(remoteHostName, ELIPaddress);
log.writeLog(__LINE__, "addModule - Set Elastic IP Address: " + remoteModuleName + "/" + ELIPaddress, LOG_TYPE_DEBUG);
}
catch(...) {
log.writeLog(__LINE__, "addModule - Failed to Set Elastic IP Address: " + remoteModuleName + "/" + ELIPaddress, LOG_TYPE_ERROR);
}
break;
}
}
}
}
//if amazon, delay to give time for ProcMon to start
if (amazon) {
log.writeLog(__LINE__, "addModule - sleep 30 - give ProcMon time to start on new Instance", LOG_TYPE_DEBUG);
sleep(30);
}
//distribute config file
distributeConfigFile("system");
log.writeLog(__LINE__, "Setup MySQL Replication for new Modules being Added", LOG_TYPE_DEBUG);
processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, false, true, password );
return API_SUCCESS;
}
/******************************************************************************************
* @brief removeModule
*
* purpose: Remove Module to system configuration
*
******************************************************************************************/
int ProcessManager::removeModule(oam::DeviceNetworkList devicenetworklist, bool manualFlag)
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
ModuleTypeConfig moduletypeconfig;
ModuleTypeConfig setmoduletypeconfig;
Oam oam;
string Section;
pthread_mutex_lock(&THREAD_LOCK);
//get module count being removed
int RemoveModuleCount = devicenetworklist.size();
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
//
//Get System Configuration
//
listPT = devicenetworklist.begin();
string moduleType = (*listPT).DeviceName.substr(0,MAX_MODULE_TYPE_SIZE);
try{
oam.getSystemConfig(moduleType, moduletypeconfig);
}
catch(...)
{
log.writeLog(__LINE__, "removeModule - ERROR: getSystemConfig", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
setmoduletypeconfig = moduletypeconfig;
// get current Module Type Count and validate request
int oldModuleCount = moduletypeconfig.ModuleCount;
if ( oldModuleCount < RemoveModuleCount ) {
log.writeLog(__LINE__, "removeModule - ERROR: remove count is larger than ModuleType count", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_INVALID_PARAMETER;
}
//validate the module list to be removed
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
int returnStatus = oam.validateModule((*listPT).DeviceName);
if (returnStatus != API_SUCCESS) {
log.writeLog(__LINE__, "removeModule - ERROR: invalid module: " + (*listPT).DeviceName, LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_INVALID_PARAMETER;
}
}
if(manualFlag)
{
//stopModules being removed with the REMOVE option, which will stop process
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleName = (*listPT).DeviceName;
log.writeLog(__LINE__, "removeModule - stopping module: " + moduleName, LOG_TYPE_DEBUG);
//don't allow remove of Active PM Module
if ( moduleName == config.OAMParentName() ) {
log.writeLog(__LINE__, "removeModule - ERROR: can't remove current module (Active Parent OAM) ", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_INVALID_PARAMETER;
}
int status;
status = stopModule(moduleName, REMOVE, true);
if (status == API_SUCCESS) {
log.writeLog(__LINE__, "removeModule - stopModule Successfully " + moduleName, LOG_TYPE_DEBUG);
//check for SIMPLEX Processes on mate might need to be started
pthread_mutex_unlock(&THREAD_LOCK);
checkSimplexModule(moduleName);
pthread_mutex_lock(&THREAD_LOCK);
}
else
log.writeLog(__LINE__, "removeModule - stopModule " + moduleName, LOG_TYPE_ERROR);
}
}
int newModuleCount = oldModuleCount - RemoveModuleCount;
setmoduletypeconfig.ModuleCount = newModuleCount;
string systemName;
string AmazonAutoTagging;
{
try{
oam.getSystemConfig("SystemName", systemName);
oam.getSystemConfig("AmazonAutoTagging", AmazonAutoTagging);
}
catch(...) {}
}
//Clear out Module IP and Hostnames
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleName = (*listPT).DeviceName;
log.writeLog(__LINE__, "removeModule - removing module: " + moduleName, LOG_TYPE_DEBUG);
//don't allow remove of Active PM Module
if ( moduleName == config.OAMParentName() ) {
log.writeLog(__LINE__, "removeModule - ERROR: can't remove current module (Active Parent OAM) ", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_INVALID_PARAMETER;
}
DeviceNetworkList::iterator pt = setmoduletypeconfig.ModuleNetworkList.begin();
for ( ; pt != setmoduletypeconfig.ModuleNetworkList.end() ; pt++)
{
if ( moduleName == (*pt).DeviceName ) {
HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();
for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ )
{
//if cloud, delete instance
if (amazon)
{
log.writeLog(__LINE__, "removeModule - terminate instance: " + (*pt1).HostName, LOG_TYPE_DEBUG);
oam.terminateEC2Instance( (*pt1).HostName );
// update instance tag
if ( AmazonAutoTagging == "y" )
{
string tagValue = systemName + "-" + moduleName + "-terminated";
oam.createEC2tag( (*pt1).HostName, "Name", tagValue );
}
//check if any volumes need to be deleted
if ( moduleType == "um" )
{
string UMStorageType = "internal";
{
try{
oam.getSystemConfig("UMStorageType", UMStorageType);
}
catch(...) {}
}
if ( UMStorageType == "external" )
{ //check if volume already assigned or need to create a new one
int moduleID = atoi(moduleName.substr(MAX_MODULE_TYPE_SIZE,MAX_MODULE_ID_SIZE).c_str());
string volumeNameID = "UMVolumeName" + oam.itoa(moduleID);
string volumeName = oam::UnassignedName;
string deviceNameID = "UMVolumeDeviceName" + oam.itoa(moduleID);
string deviceName = oam::UnassignedName;
try {
oam.getSystemConfig( volumeNameID, volumeName);
oam.getSystemConfig( deviceNameID, deviceName);
}
catch(...)
{}
if ( !volumeName.empty() || volumeName != oam::UnassignedName ) {
log.writeLog(__LINE__, "removeModule - detach / remove volume: " + volumeName + "/" + deviceName, LOG_TYPE_DEBUG);
oam.detachEC2Volume( volumeName );
oam.deleteEC2Volume( volumeName );
try {
Config* sysConfig = Config::makeConfig();
sysConfig->setConfig("Installation", volumeNameID, oam::UnassignedName);
sysConfig->setConfig("Installation", deviceNameID, oam::UnassignedName);
sysConfig->write();
}
catch(...)
{}
}
}
}
}
clearNICAlarms((*pt1).HostName);
(*pt1).IPAddr = oam::UnassignedIpAddr;
(*pt1).HostName = oam::UnassignedName;
}
break;
}
}
}
//Remove Process Ports
// all nodes: ProcessMonitor, ServerMonitor
// dm: NONE
// um: ExeMgr
// pm: NONE
Config* sysConfig = Config::makeConfig();
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
Section = (*listPT).DeviceName + "_ProcessMonitor";
sysConfig->setConfig(Section, "IPAddr", oam::UnassignedName);
Section = (*listPT).DeviceName + "_ServerMonitor";
sysConfig->setConfig(Section, "IPAddr", oam::UnassignedName);
}
if ( moduleType == "um" ||
( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) ||
( moduleType == "um" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM ) ||
( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_PM_UM ) ||
( moduleType == "pm" && PMwithUM == "y" ) ) {
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
// go find ExeMgr ID by moduleName
for ( int id = 1 ; ; id++ )
{
string Section = "ExeMgr" + oam.itoa(id);
string moduleName;
try {
Config* sysConfig = Config::makeConfig();
moduleName = sysConfig->getConfig(Section, "Module");
if ( moduleName == (*listPT).DeviceName )
{ // match
sysConfig->setConfig(Section, "IPAddr", oam::UnassignedName);
sysConfig->setConfig(Section, "Module", oam::UnassignedName);
break;
}
}
catch (...) {}
if ( moduleName.empty() )
break;
}
}
}
log.writeLog(__LINE__, "removeModule - Updated Process Ports", LOG_TYPE_DEBUG);
//unassign dbroot entries
if (moduleType == "pm")
{
const string MODULE_DBROOTID = "ModuleDBRootID";
const string MODULE_DBROOT_COUNT = "ModuleDBRootCount";
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleID = (*listPT).DeviceName.substr(MAX_MODULE_TYPE_SIZE,MAX_MODULE_ID_SIZE);
string ModuleDBRootCount = MODULE_DBROOT_COUNT + moduleID + "-3";
sysConfig->setConfig("SystemModuleConfig", ModuleDBRootCount, oam::UnassignedName);
string ModuleDBrootID = MODULE_DBROOTID + moduleID + "-1-3";
sysConfig->setConfig("SystemModuleConfig", ModuleDBrootID, oam::UnassignedName);
}
}
log.writeLog(__LINE__, "removeModule - Updated DBRoot paramaters", LOG_TYPE_DEBUG);
//update Columnstore Config table
try {
sysConfig->write();
}
catch(...)
{
log.writeLog(__LINE__, "removeModule - ERROR: sysConfig->write", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
//write Columnstore.xml Module section
try {
oam.setSystemConfig(moduleType, setmoduletypeconfig);
log.writeLog(__LINE__, "removeModule - Updated Module Section of Config file", LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "removeModule - ERROR: setSystemConfig", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
pthread_mutex_unlock(&THREAD_LOCK);
//check if any removed modules was Standby OAM or Active OAM
bool activeOAM = false;
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
if ( (*listPT).DeviceName == config.OAMStandbyName() )
clearStandbyModule();
else
if ( (*listPT).DeviceName == config.OAMParentName() )
activeOAM = true;
}
//
//send message to Process Monitor to remove module/processes to shared memory
//
if ( !activeOAM )
{
try
{
ByteStream obs;
obs << (ByteStream::byte) REMOVE_MODULE;
obs << (ByteStream::byte) RemoveModuleCount;
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
obs << (*listPT).DeviceName;
}
sendStatusUpdate(obs, REMOVE_MODULE);
log.writeLog(__LINE__, "removeModule - Updated Shared Memory", LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "removeModule - ERROR: sendStatusUpdate error", LOG_TYPE_ERROR);
return API_FAILURE;
}
}
if ( moduleType == "pm" ) {
if ( updatePMSconfig() != API_SUCCESS )
return API_FAILURE;
}
//Update DBRM section of Columnstore.xml
if ( updateWorkerNodeconfig() != API_SUCCESS )
return API_FAILURE;
// remove all associated alarms for this modules being removed
listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
clearModuleAlarms( (*listPT).DeviceName );
log.writeLog(__LINE__, "removeModule - successfully removed module: " + (*listPT).DeviceName, LOG_TYPE_DEBUG);
}
//distribute config file
distributeConfigFile("system");
string password;
// check if there is a root password stored
string rpw = oam::UnassignedName;
try
{
oam.getSystemConfig("rpw", password);
}
catch(...)
{
rpw = "root";
}
return API_SUCCESS;
}
/******************************************************************************************
* @brief reconfigureModule
*
* purpose: Reconfigure Module in system configuration
*
******************************************************************************************/
int ProcessManager::reconfigureModule(oam::DeviceNetworkList devicenetworklist)
{
ModuleTypeConfig reconfiguremoduletypeconfig;
ModuleTypeConfig setreconfiguremoduletypeconfig;
ModuleTypeConfig moduletypeconfig;
DeviceNetworkConfig devicenetworkconfig;
Oam oam;
string Section;
pthread_mutex_lock(&THREAD_LOCK);
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
//get module name being reconfigured
string moduleName = (*listPT).DeviceName;
string moduleType = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
//get module type being configured as
listPT++;
string reconfigureModuleName = (*listPT).DeviceName;
string reconfigureModuleType = reconfigureModuleName.substr(0,MAX_MODULE_TYPE_SIZE);
string reconfigureHostName2;
string reconfigureIpAddr2;
int reconfigureNicId2 = 0;
if ( !(*listPT).hostConfigList.empty()) {
HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
reconfigureHostName2 = (*pt1).HostName;
reconfigureIpAddr2 = (*pt1).IPAddr;
reconfigureNicId2 = (*pt1).NicID;
}
int status = stopModule(moduleName, GRACEFUL, true);
if (status == API_SUCCESS) {
log.writeLog(__LINE__, "reconfigureModule - stopModule Successfully " + moduleName, LOG_TYPE_DEBUG);
//check for SIMPLEX Processes on mate might need to be started
pthread_mutex_unlock(&THREAD_LOCK);
checkSimplexModule(moduleName);
pthread_mutex_lock(&THREAD_LOCK);
}
else
log.writeLog(__LINE__, "reconfigureModule - stopModule " + moduleName, LOG_TYPE_ERROR);
//
//Get Module Configuration
//
try{
oam.getSystemConfig(moduleType, moduletypeconfig);
oam.getSystemConfig(reconfigureModuleType, reconfiguremoduletypeconfig);
}
catch(...)
{
log.writeLog(__LINE__, "reconfigureModule - ERROR: getSystemConfig", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
setreconfiguremoduletypeconfig = reconfiguremoduletypeconfig;
// update Module Type Counts
setreconfiguremoduletypeconfig.ModuleCount++;
Config* sysConfig = Config::makeConfig();
//Move Module IP and Hostnames
string IPaddress = oam::UnassignedIpAddr;
HostConfig hostconfig;
DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin();
for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++)
{
if ( moduleName == (*pt).DeviceName ) {
devicenetworkconfig.DeviceName = reconfigureModuleName;
HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();
for( ; pt1 != (*pt).hostConfigList.end() ; pt1++)
{
if ( pt1 == (*pt).hostConfigList.begin() )
//save first IP for Process Port usage
IPaddress = (*pt1).IPAddr;
hostconfig.IPAddr = (*pt1).IPAddr;
hostconfig.HostName = (*pt1).HostName;
hostconfig.NicID = (*pt1).NicID;
devicenetworkconfig.hostConfigList.push_back(hostconfig);
}
//configure any secondary NIC info passed from console
if ( ! reconfigureHostName2.empty() ) {
hostconfig.IPAddr = reconfigureIpAddr2;
hostconfig.HostName = reconfigureHostName2;
hostconfig.NicID = reconfigureNicId2;
devicenetworkconfig.hostConfigList.push_back(hostconfig);
}
setreconfiguremoduletypeconfig.ModuleNetworkList.push_back(devicenetworkconfig);
break;
}
}
if ( IPaddress == oam::UnassignedIpAddr ) {
log.writeLog(__LINE__, "reconfigureModule - ERROR: module IP is unassigned", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
//Update Process Ports
// all nodes: ProcessMonitor, ServerMonitor
// dm: NONE
// um: ExeMgr
// pm: NONE
Section = reconfigureModuleName + "_ProcessMonitor";
sysConfig->setConfig(Section, "IPAddr", IPaddress);
sysConfig->setConfig(Section, "Port", "8800");
Section = reconfigureModuleName + "_ServerMonitor";
sysConfig->setConfig(Section, "IPAddr", IPaddress);
sysConfig->setConfig(Section, "Port", "8622");
if ( moduleType == "um" ||
( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) ||
( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_PM_UM ) ) {
int moduleID = atoi(moduleName.substr(MAX_MODULE_TYPE_SIZE,MAX_MODULE_ID_SIZE).c_str());
Section = "ExeMgr" + oam.itoa(moduleID);
sysConfig->setConfig(Section, "IPAddr", oam::UnassignedIpAddr);
}
else
{
//PM TO UM
int moduleID = atoi(reconfigureModuleName.substr(MAX_MODULE_TYPE_SIZE,MAX_MODULE_ID_SIZE).c_str());
Section = "ExeMgr" + oam.itoa(moduleID);
sysConfig->setConfig(Section, "IPAddr", IPaddress);
sysConfig->setConfig(Section, "Port", "8601");
}
log.writeLog(__LINE__, "reconfigureModule - Updated Process Ports", LOG_TYPE_DEBUG);
//update Columnstore Config table
try {
sysConfig->write();
}
catch(...)
{
log.writeLog(__LINE__, "reconfigureModule - ERROR: sysConfig->write", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
//write Columnstore.xml Module section
try {
oam.setSystemConfig(reconfigureModuleType, setreconfiguremoduletypeconfig);
log.writeLog(__LINE__, "reconfigureModule - Updated Module Section of Config file", LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "reconfigureModule - ERROR: setSystemConfig", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
//distribute config file
distributeConfigFile(moduleName);
//
//Send Reconfigure msg to Module's Process-Monitor being reconfigured
//
ByteStream msg;
ByteStream::byte requestID = RECONFIGURE;
msg << requestID;
msg << reconfigureModuleName;
int returnStatus = sendMsgProcMon( moduleName, msg, requestID );
if ( returnStatus == API_SUCCESS)
//log the event
log.writeLog(__LINE__, "reconfigureModule - procmon reconfigure successful", LOG_TYPE_DEBUG);
else
{
log.writeLog(__LINE__, "reconfigureModule - procmon reconfigure failed", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
ModuleTypeConfig setmoduletypeconfig;
try{
oam.getSystemConfig(moduleType, setmoduletypeconfig);
}
catch(...)
{
log.writeLog(__LINE__, "reconfigureModule - ERROR: getSystemConfig", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
// update Module Type Counts
setmoduletypeconfig.ModuleCount--;
//Clear Module IP and Hostnames
pt = setmoduletypeconfig.ModuleNetworkList.begin();
for ( ; pt != setmoduletypeconfig.ModuleNetworkList.end() ; pt++)
{
if ( moduleName == (*pt).DeviceName ) {
HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();
for( ; pt1 != (*pt).hostConfigList.end() ; pt1++)
{
(*pt1).IPAddr = oam::UnassignedIpAddr;
(*pt1).HostName = oam::UnassignedName;
}
break;
}
}
//Update Process Ports
// all nodes: ProcessMonitor, ServerMonitor
// dm: NONE
// um: ExeMgr
// pm: NONE
Section = moduleName + "_ProcessMonitor";
sysConfig->setConfig(Section, "IPAddr", oam::UnassignedIpAddr);
Section = moduleName + "_ServerMonitor";
sysConfig->setConfig(Section, "IPAddr", oam::UnassignedIpAddr);
log.writeLog(__LINE__, "reconfigureModule - Updated Process Ports", LOG_TYPE_DEBUG);
//update Columnstore Config table
try {
sysConfig->write();
}
catch(...)
{
log.writeLog(__LINE__, "reconfigureModule - ERROR: sysConfig->write", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
//write Columnstore.xml Module section
try {
oam.setSystemConfig(moduleType, setmoduletypeconfig);
log.writeLog(__LINE__, "reconfigureModule - Updated Module Section of Config file", LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "reconfigureModule - ERROR: setSystemConfig", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
pthread_mutex_unlock(&THREAD_LOCK);
//
//send message to Process Monitor to remove/add module/processes to shared memory
//
try
{
ByteStream obs;
obs << (ByteStream::byte) REMOVE_MODULE;
obs << (ByteStream::byte) 1;
obs << moduleName;
sendStatusUpdate(obs, REMOVE_MODULE);
log.writeLog(__LINE__, "reconfigureModule - module removed from Shared Memory", LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "reconfigureModule - ERROR: sendStatusUpdate error", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
try
{
ByteStream obs;
obs << (ByteStream::byte) ADD_MODULE;
obs << (ByteStream::byte) 1;
obs << reconfigureModuleName;
//pass NIC Hostnames
if ( ! reconfigureHostName2.empty() ) {
obs << (ByteStream::byte) 1;
obs << hostconfig.HostName;
}
else
obs << (ByteStream::byte) 0;
sendStatusUpdate(obs, ADD_MODULE);
log.writeLog(__LINE__, "reconfigureModule - module added from Shared Memory", LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "reconfigureModule - ERROR: sendStatusUpdate error", LOG_TYPE_ERROR);
return API_FAILURE;
}
if ( moduleType == "pm" ) {
if ( updatePMSconfig() != API_SUCCESS )
return API_FAILURE;
}
//Update DBRM section of Columnstore.xml
if ( updateWorkerNodeconfig() != API_SUCCESS )
return API_FAILURE;
// remove all associated alarms for this modules being removed
clearModuleAlarms( moduleName );
//distribute config file
distributeConfigFile("system");
return API_SUCCESS;
}
/******************************************************************************************
* @brief sendMsgProcMon
*
* purpose: Sends a Msg to ProcMon
*
******************************************************************************************/
int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requestID, int timeout )
{
string msgPort;
int returnStatus = API_MINOR_FAILURE;
Oam oam;
if ( module != config.moduleName() ) {
msgPort = module + "_ProcessMonitor";
// do a ping test to determine a quick failure
Config* sysConfig = Config::makeConfig();
string IPAddr = sysConfig->getConfig(msgPort, "IPAddr");
if ( IPAddr == oam::UnassignedIpAddr ) {
log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR);
return oam::API_SUCCESS;
}
string cmdLine = "ping ";
string cmdOption = " -c 1 -w 5 >> /dev/null";
string cmd = cmdLine + IPAddr + cmdOption;
if ( system(cmd.c_str()) != 0) {
//ping failure
log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR);
return oam::API_SUCCESS;
}
}
else
// use the localhost IP Address
msgPort = "localhost_ProcessMonitor";
log.writeLog(__LINE__, "sendMsgProcMon: Process module " + module , LOG_TYPE_DEBUG);
try
{
MessageQueueClient mqRequest(msgPort);
mqRequest.write(msg);
if ( timeout > 0 ) {
// wait for response
ByteStream::byte returnACK;
ByteStream::byte returnRequestID;
ByteStream::byte requestStatus;
ByteStream receivedMSG;
struct timespec ts = { timeout, 0 };
// get current time in seconds
time_t startTimeSec;
time (&startTimeSec);
while(true)
{
try {
receivedMSG = mqRequest.read(&ts);
}
catch (SocketClosed &ex) {
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read, module " + module + " : " + error, LOG_TYPE_ERROR);
return returnStatus;
}
catch (...) {
// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: Caught unknown exception! module " + module, LOG_TYPE_ERROR);
return returnStatus;
}
if (receivedMSG.length() > 0) {
receivedMSG >> returnACK;
receivedMSG >> returnRequestID;
receivedMSG >> requestStatus;
if ( requestID == oam::MASTERREP )
{
receivedMSG >> masterLogFile;
receivedMSG >> masterLogPos;
}
if ( returnACK == oam::ACK && returnRequestID == requestID) {
// ACK for this request
returnStatus = requestStatus;
break;
}
else
log.writeLog(__LINE__, "sendMsgProcMon: invalid message " + module, LOG_TYPE_ERROR);
}
else
{ //api timeout occurred, check if retry should be done
// get current time in seconds
time_t endTimeSec;
time (&endTimeSec);
if ( timeout <= (endTimeSec - startTimeSec) ) {
log.writeLog(__LINE__, "sendMsgProcMon: ProcMon Msg timeout on module " + module, LOG_TYPE_ERROR);
break;
}
}
}
}
else
returnStatus = oam::API_SUCCESS;
mqRequest.shutdown();
}
catch (exception& ex)
{
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR);
}
return returnStatus;
}
/******************************************************************************************
* @brief sendMsgProcMon1
*
* purpose: Sends a Msg to ProcMon
*
******************************************************************************************/
std::string ProcessManager::sendMsgProcMon1( std::string module, ByteStream msg, int requestID )
{
string msgPort;
string returnStatus = "FAILED";
if ( module != config.moduleName() ) {
msgPort = module + "_ProcessMonitor";
// do a ping test to determine a quick failure
Config* sysConfig = Config::makeConfig();
string IPAddr = sysConfig->getConfig(msgPort, "IPAddr");
string cmdLine = "ping ";
string cmdOption = " -c 1 -w 5 >> /dev/null";
string cmd = cmdLine + IPAddr + cmdOption;
if ( system(cmd.c_str()) != 0 ) {
//ping failure
log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR);
return returnStatus;
}
}
else
// use the localhost IP Address
msgPort = "localhost_ProcessMonitor";
try
{
MessageQueueClient mqRequest(msgPort);
mqRequest.write(msg);
// wait 30 seconds for response
ByteStream::byte returnACK;
ByteStream::byte returnRequestID;
string requestStatus;
ByteStream receivedMSG;
struct timespec ts = { 30, 0 };
try {
receivedMSG = mqRequest.read(&ts);
}
catch (SocketClosed &ex) {
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: " + error, LOG_TYPE_ERROR);
return returnStatus;
}
catch (...) {
// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: Caught unknown exception!", LOG_TYPE_ERROR);
return returnStatus;
}
if (receivedMSG.length() > 0) {
receivedMSG >> returnACK;
receivedMSG >> returnRequestID;
receivedMSG >> requestStatus;
if ( returnACK == oam::ACK && returnRequestID == requestID) {
// ACK for this request
returnStatus = requestStatus;
}
}
else
log.writeLog(__LINE__, "sendMsgProcMon1: ProcMon Msg timeout on module " + module, LOG_TYPE_ERROR);
mqRequest.shutdown();
}
catch (exception& ex)
{
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR);
}
return returnStatus;
}
/******************************************************************************************
* @brief saveBRM
*
* purpose: Execute the reset_locks then save BRM data script
*
******************************************************************************************/
void ProcessManager::saveBRM(bool skipSession, bool clearshm)
{
Oam oam;
string logdir("/var/log/mariadb/columnstore");
if (access(logdir.c_str(), W_OK) != 0) logdir = "/tmp";
log.writeLog(__LINE__, "Running reset_locks", LOG_TYPE_DEBUG);
string skip = " ";
if ( skipSession )
skip = "-s";
string cmd = startup::StartUp::installDir() + "/bin/reset_locks " + skip + " > " + logdir + "/reset_locks.log1 2>&1";
int rtnCode = system(cmd.c_str());
log.writeLog(__LINE__, "Ran reset_locks", LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Running DBRM save_brm", LOG_TYPE_DEBUG);
cmd = startup::StartUp::installDir() + "/bin/save_brm > " + logdir + "/save_brm.log1 2>&1";
rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) != 1) {
log.writeLog(__LINE__, "Successfully ran DBRM save_brm", LOG_TYPE_DEBUG);
}
else
log.writeLog(__LINE__, "Error running DBRM save_brm", LOG_TYPE_ERROR);
if ( clearshm )
{
cmd = startup::StartUp::installDir() + "/bin/clearShm -c > /dev/null 2>&1";
rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) != 1) {
log.writeLog(__LINE__, "Successfully ran DBRM clearShm", LOG_TYPE_DEBUG);
}
else
log.writeLog(__LINE__, "Error running DBRM clearShm", LOG_TYPE_ERROR);
}
}
/******************************************************************************************
* @brief setQuerySystemState
*
* purpose: set query system state not ready
*
******************************************************************************************/
void ProcessManager::setQuerySystemState(bool set)
{
Oam oam;
BRM::DBRM dbrm;
log.writeLog(__LINE__, "setQuerySystemState = " + oam.itoa(set), LOG_TYPE_DEBUG);
try {
dbrm.setSystemQueryReady(set);
log.writeLog(__LINE__, "setQuerySystemState successful", LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "setQuerySystemState failed", LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "setQuerySystemState failed", LOG_TYPE_ERROR);
}
}
/******************************************************************************************
* @brief createModuleFile
*
* purpose: Create a module file for remote server
*
******************************************************************************************/
bool ProcessManager::createModuleFile(string remoteModuleName)
{
// Read Local Install flag
string fileName = startup::StartUp::installDir() + "/local/etc/" + remoteModuleName + "/module";
unlink (fileName.c_str());
ofstream newFile (fileName.c_str());
string cmd = "echo " + remoteModuleName + " > " + fileName;
system(cmd.c_str());
newFile.close();
return true;
}
/*****************************************************************************************
* @brief startSystemThread
*
* purpose: Send Messages to Module Process Monitors to start Processes
*
*****************************************************************************************/
void startSystemThread(oam::DeviceNetworkList Devicenetworklist)
{
oam::DeviceNetworkList devicenetworklist = Devicenetworklist;
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
SystemModuleTypeConfig systemmoduletypeconfig;
ALARMManager aManager;
int status = API_SUCCESS;
bool exitThread = false;
int exitThreadStatus = oam::API_SUCCESS;
pthread_t ThreadId;
ThreadId = pthread_self();
log.writeLog(__LINE__, "startSystemThread launched", LOG_TYPE_DEBUG);
// get system status and exit thread if in AUTO_INIT OR MAN_INIT
SystemStatus systemstatus;
try {
oam.getSystemStatus(systemstatus);
if (systemstatus.SystemOpState == AUTO_INIT ||
systemstatus.SystemOpState == MAN_INIT) {
log.writeLog(__LINE__, "Start already in-progess, exit startSystemThread", LOG_TYPE_DEBUG);
startsystemthreadStatus = oam::API_ALREADY_IN_PROGRESS;
exitThread = true;
exitThreadStatus = oam::API_ALREADY_IN_PROGRESS;
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: " + error, LOG_TYPE_ERROR);
startsystemthreadStatus = oam::API_FAILURE;
processManager.setSystemState(oam::MAN_OFFLINE);
exitThread = true;
exitThreadStatus = oam::API_FAILURE;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: Caught unknown exception!", LOG_TYPE_ERROR);
startsystemthreadStatus = oam::API_FAILURE;
processManager.setSystemState(oam::MAN_OFFLINE);
exitThread = true;
exitThreadStatus = oam::API_FAILURE;
}
if ( exitThread ) {
pthread_detach (ThreadId);
pthread_exit(reinterpret_cast<void*>(static_cast<ptrdiff_t>(exitThreadStatus)));
}
if (systemstatus.SystemOpState == AUTO_OFFLINE)
processManager.setSystemState(oam::AUTO_INIT);
else
processManager.setSystemState(oam::MAN_INIT);
//validate the dbroots assignments
//make sure no 1 ID is assigned to 2 PMs
//and a dbroot not assigned to a DISABLED PM
try
{
systemStorageInfo_t t;
t = oam.getStorageConfig();
DeviceDBRootList moduledbrootlist1 = boost::get<2>(t);
DeviceDBRootList moduledbrootlist2 = boost::get<2>(t);
DeviceDBRootList::iterator pt1 = moduledbrootlist1.begin();
for( ; pt1 != moduledbrootlist1.end() ; pt1++)
{
string moduleID1 = oam.itoa((*pt1).DeviceID);
string moduleName = "pm" + moduleID1;
// check DISABLED modules
int opState = oam::ACTIVE;
bool degraded;
try{
oam.getModuleStatus(moduleName, opState, degraded);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
continue;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
continue;
}
//check if disabled
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) {
if ( (*pt1).dbrootConfigList.size() != 0 ) {
//issue log and Set the alarm
log.writeLog(__LINE__, "startSystemThread failed: Disabled Module '" + moduleName + "' has DBRoots assigned to it", LOG_TYPE_CRITICAL);
aManager.sendAlarmReport(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
startsystemthreadStatus = oam::API_FAILURE;
processManager.setSystemState(oam::FAILED);
pthread_detach (ThreadId);
pthread_exit((void*) oam::API_FAILURE);
}
continue;
}
// if module has no dbroots assigned, fail startSystem
if ( (*pt1).dbrootConfigList.size() == 0 ) {
//issue log and Set the alarm
log.writeLog(__LINE__, "startSystemThread failed: Module '" + moduleName + "' has no DBRoots assigned to it", LOG_TYPE_CRITICAL);
aManager.sendAlarmReport(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
startsystemthreadStatus = oam::API_FAILURE;
processManager.setSystemState(oam::FAILED);
pthread_detach (ThreadId);
pthread_exit((void*) oam::API_FAILURE);
}
DBRootConfigList::iterator pt1a = (*pt1).dbrootConfigList.begin();
for( ; pt1a != (*pt1).dbrootConfigList.end() ; pt1a++)
{
DeviceDBRootList::iterator pt2 = moduledbrootlist2.begin();
for( ; pt2 != moduledbrootlist2.end() ; pt2++)
{
string moduleID2 = oam.itoa((*pt2).DeviceID);
if ( moduleID1 == moduleID2 )
continue;
DBRootConfigList::iterator pt2a = (*pt2).dbrootConfigList.begin();
for( ; pt2a != (*pt2).dbrootConfigList.end() ; pt2a++)
{
if ( *pt1a == *pt2a) {
log.writeLog(__LINE__, "ERROR: DBRoot ID " + oam.itoa(*pt1a) + " configured on 2 pms: 'pm" + moduleID1 + "' and 'pm" + moduleID2 + "'", LOG_TYPE_CRITICAL);
//Set the alarm
aManager.sendAlarmReport(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
startsystemthreadStatus = oam::API_FAILURE;
processManager.setSystemState(oam::FAILED);
pthread_detach (ThreadId);
pthread_exit((void*) oam::API_FAILURE);
}
}
}
}
}
}
catch (exception& e)
{}
try{
oam.getSystemConfig(systemmoduletypeconfig);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
startsystemthreadStatus = oam::API_FAILURE;
processManager.setSystemState(oam::FAILED);
exitThread = true;
exitThreadStatus = oam::API_FAILURE;
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
startsystemthreadStatus = oam::API_FAILURE;
processManager.setSystemState(oam::FAILED);
exitThread = true;
exitThreadStatus = oam::API_FAILURE;
}
if ( exitThread ) {
pthread_detach (ThreadId);
pthread_exit(reinterpret_cast<void*>(static_cast<ptrdiff_t>(exitThreadStatus)));
}
if (systemstatus.SystemOpState == AUTO_OFFLINE)
processManager.setSystemState(oam::AUTO_INIT);
else
processManager.setSystemState(oam::MAN_INIT);
startsystemthreadRunning = true;
string newStandbyModule = processManager.getStandbyModule();
if ( !newStandbyModule.empty() && newStandbyModule != "NONE")
processManager.setStandbyModule(newStandbyModule);
//update workernode section
processManager.updateWorkerNodeconfig();
//configure PMS ports
if ( processManager.updatePMSconfig() != API_SUCCESS ) {
startsystemthreadStatus = oam::API_FAILURE;
processManager.setSystemState(oam::FAILED);
pthread_detach (ThreadId);
pthread_exit((void*) oam::API_FAILURE);
}
if ( devicenetworklist.size() != 0 ) {
//distribute config file
processManager.distributeConfigFile("system");
// start modules from devicenetworklist
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
//launch start module threads, starting with local module
pthread_t startmodulethread;
string moduleName = config.moduleName();
int status = pthread_create (&startmodulethread, NULL, (void*(*)(void*)) &startModuleThread, &moduleName);
if ( status != 0 )
log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
sleep(5);
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleName = (*listPT).DeviceName;
// skip local module name
if ( moduleName == config.moduleName() )
continue;
// bypass DISABLED modules
try{
int opState = oam::ACTIVE;
bool degraded;
oam.getModuleStatus(moduleName, opState, degraded);
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
//skip
continue;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
pthread_t startmodulethread;
int status = pthread_create (&startmodulethread, NULL, (void*(*)(void*)) &startModuleThread, &moduleName);
if ( status != 0 )
log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
sleep(5);
}
}
else {
// start all modules, like on a systemStart command
//launch start module threads, starting with local module
if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM )
{
try {
oam.setSystemConfig("PrimaryUMModuleName", config.OAMParentName());
}
catch(...) {}
processManager.setPMProcIPs(config.OAMParentName());
}
//distribute config file
processManager.distributeConfigFile("system");
pthread_t startmodulethread;
string moduleName = config.moduleName();
int status = pthread_create (&startmodulethread, NULL, (void*(*)(void*)) &startModuleThread, &moduleName);
if ( status != 0 )
log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
sleep(5);
for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
string moduleName = (*pt).DeviceName;
// skip local module name
if ( moduleName == config.moduleName() )
continue;
// bypass DISABLED modules
try{
int opState = oam::ACTIVE;
bool degraded;
oam.getModuleStatus(moduleName, opState, degraded);
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
//skip
continue;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
//setup primary User Module, DML/DDL only start on this module
if ( moduleName.find("um") == 0 && config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM)
{
string PrimaryUMModuleName;
try {
oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName);
}
catch(...) {}
if ( PrimaryUMModuleName == oam::UnassignedName )
{
try {
oam.setSystemConfig("PrimaryUMModuleName", moduleName);
}
catch(...) {}
processManager.setPMProcIPs(moduleName);
//distribute config file
processManager.distributeConfigFile("system");
}
}
pthread_t startmodulethread;
string name = moduleName;
int status = pthread_create (&startmodulethread, NULL, (void*(*)(void*)) &startModuleThread, &name);
if ( status != 0 )
log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
if ( !HDFS )
sleep(5);
else
//usleep(100000);
sleep(1);
}
}
}
// check status and process accordingly
int k = 0;
for( ; k < 1200 ; k++ )
{
if ( startsystemthreadStop ) {
log.writeLog(__LINE__, "startSystemThread exit early, startsystemthreadStop set", LOG_TYPE_DEBUG);
if ( startmodulethreadStatus != API_SUCCESS ) {
startsystemthreadStatus = startmodulethreadStatus;
processManager.setSystemState(oam::FAILED);
}
else
{
startsystemthreadStatus = API_FAILURE;
processManager.setSystemState(oam::MAN_OFFLINE);
}
startsystemthreadRunning = false;
pthread_detach (ThreadId);
pthread_exit((void*) oam::API_FAILURE);
}
string moduleName;
status = API_SUCCESS;
for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
moduleName = (*pt).DeviceName;
// get module status
try{
int opState = oam::ACTIVE;
bool degraded;
oam.getModuleStatus(moduleName, opState, degraded);
if ( opState == oam::FAILED ) {
if ( startmodulethreadStatus != API_SUCCESS )
status = startmodulethreadStatus;
else
status = API_FAILURE;
break;
}
if (opState == oam::ACTIVE ||
opState == oam::MAN_DISABLED ||
opState == oam::AUTO_DISABLED ||
(opState == oam::MAN_OFFLINE && k > 0) )
//skip
continue;
status = API_ALREADY_IN_PROGRESS;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
continue;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
continue;
}
}
if( status == API_FAILURE )
break;
}
//get out of loop if all modules started successfully
if( status == API_SUCCESS ) {
//send message to start new Standby Process-Manager, if needed
string newStandbyModule = processManager.getStandbyModule();
if ( !newStandbyModule.empty() && newStandbyModule != "NONE") {
// get standby IP address and update entries
processManager.setStandbyModule(newStandbyModule);
//distribute config file
processManager.distributeConfigFile("system");
}
break;
}
else
{
//get out of loop if start module failed
if( status == API_FAILURE ) {
//set system status
log.writeLog(__LINE__, "startSystemThread: Module failed, Set System State to FAILED: " + moduleName , LOG_TYPE_CRITICAL);
processManager.setSystemState(oam::FAILED);
break;
}
}
sleep(5);
}
if ( k == 1200 ) {
// system didn't Successfully restart
log.writeLog(__LINE__, "startSystemThread: Modules failed to start after 1200 tries, Set System State to FAILED" , LOG_TYPE_CRITICAL);
processManager.setSystemState(oam::FAILED);
status = oam::API_FAILURE;
}
// Bug 4554: Wait until DMLProc is finished with rollback
if (status == oam::API_SUCCESS)
{
BRM::DBRM dbrm;
uint16_t rtn = 0;
bool bfirst = true;
SystemProcessStatus systemprocessstatus;
string PrimaryUMModuleName;
try {
oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName);
}
catch(...) {}
if ( PrimaryUMModuleName.empty() )
{
log.writeLog(__LINE__, "startSystemThread: Failed, PrimaryUMModuleName is unassigned", LOG_TYPE_CRITICAL);
rtn = oam::FAILED;
log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG);
processManager.setSystemState(oam::FAILED);
startsystemthreadStatus = status;
startsystemthreadRunning = false;
pthread_detach (ThreadId);
pthread_exit(0);
}
// waiting until dml are ACTIVE, then mark system ACTIVE
while (rtn == 0)
{
ProcessStatus DMLprocessstatus;
try {
oam.getProcessStatus("DMLProc", PrimaryUMModuleName, DMLprocessstatus);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) {
if (bfirst)
{
log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback" , LOG_TYPE_INFO);
bfirst = false;
}
}
if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) {
rtn = oam::ACTIVE;
break;
}
if (DMLprocessstatus.ProcessOpState == oam::FAILED) {
rtn = oam::FAILED;
break;
}
// wait some more
sleep(2);
}
//set query system state ready
processManager.setQuerySystemState(true);
//run command to build system table if they don't already exist
sleep(5);
int ret = processManager.buildSystemTables("pm1");
if (ret == oam::API_SUCCESS )
log.writeLog(__LINE__, "System Catalog Successfully Built by ProcMgr", LOG_TYPE_DEBUG);
else
log.writeLog(__LINE__, "System Catalog Successfully not built by ProcMgr, ret code = " + oam.itoa(ret), LOG_TYPE_DEBUG);
processManager.setSystemState(rtn);
}
// exit thread
log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG);
startsystemthreadStatus = status;
startsystemthreadRunning = false;
pthread_detach (ThreadId);
pthread_exit(0);
}
/*****************************************************************************************
* @brief startModuleThread
*
* purpose: Send Messages to Module Process Monitors to start Processes
*
*****************************************************************************************/
void startModuleThread(string module)
{
//store in a local variable
string moduleName = module;
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
bool exitThread = false;
int exitThreadStatus = oam::API_SUCCESS;
pthread_t ThreadId;
ThreadId = pthread_self();
if ( moduleName.empty() ){
log.writeLog(__LINE__, "startModuleThread received on invalid module name", LOG_TYPE_ERROR);
pthread_detach (ThreadId);
pthread_exit(0);
}
log.writeLog(__LINE__, "Start Module " + moduleName, LOG_TYPE_DEBUG);
bool start = false;
while(true)
{
if ( exitThread ) {
pthread_detach (ThreadId);
pthread_exit(reinterpret_cast<void*>(static_cast<ptrdiff_t>(exitThreadStatus)));
}
// get module status
uint16_t startType = oam::MAN_OFFLINE;
try{
int opState = oam::ACTIVE;
bool degraded;
oam.getModuleStatus(moduleName, opState, degraded);
if ( opState == oam::AUTO_OFFLINE || opState == oam::AUTO_INIT)
startType = oam::AUTO_OFFLINE;
if (opState == oam::ACTIVE ||
opState == oam::MAN_DISABLED ||
opState == oam::AUTO_DISABLED ||
( opState == oam::MAN_OFFLINE && start) )
//quit
break;
start = true;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
if ( startsystemthreadStop) {
// set status and exit this thread
processManager.setModuleState(moduleName, oam::MAN_OFFLINE);
log.writeLog(__LINE__, "startModuleThread early exit on " + moduleName, LOG_TYPE_DEBUG);
pthread_detach (ThreadId);
pthread_exit(0);
}
int retStatus = processManager.startModule(moduleName, oam::FORCEFUL, startType, true);
log.writeLog(__LINE__, "ACK received from '" + moduleName + "' Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG);
if (retStatus == API_SUCCESS)
break;
else
{
if (retStatus != API_MINOR_FAILURE) {
//major failure, set stopsystem flag and exit this thread
startmodulethreadStatus = retStatus;
startsystemthreadStop = true;
break;
}
}
}
// exit thread
log.writeLog(__LINE__, "startModuleThread Exit on " + moduleName, LOG_TYPE_DEBUG);
pthread_detach (ThreadId);
pthread_exit(0);
}
/*****************************************************************************************
* @brief stopSystemThread
*
* purpose: Send Messages to Module Process Monitors to stop Processes
*
*****************************************************************************************/
void stopSystemThread(oam::DeviceNetworkList Devicenetworklist)
{
oam::DeviceNetworkList devicenetworklist = Devicenetworklist;
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
SystemModuleTypeConfig systemmoduletypeconfig;
ALARMManager aManager;
int status = API_SUCCESS;
bool exitThread = false;
int exitThreadStatus = oam::API_SUCCESS;
pthread_t ThreadId;
ThreadId = pthread_self();
log.writeLog(__LINE__, "stopSystemThread launched", LOG_TYPE_DEBUG);
try{
oam.getSystemConfig(systemmoduletypeconfig);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
stopsystemthreadStatus = oam::API_FAILURE;
processManager.setSystemState(oam::FAILED);
exitThread = true;
exitThreadStatus = oam::API_FAILURE;
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
stopsystemthreadStatus = oam::API_FAILURE;
processManager.setSystemState(oam::FAILED);
exitThread = true;
exitThreadStatus = oam::API_FAILURE;
}
if ( devicenetworklist.size() != 0 ) {
// stop modules from devicenetworklist
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
//launch start module threads, starting with local module
pthread_t stopmodulethread;
string moduleName = config.moduleName();
int status = pthread_create (&stopmodulethread, NULL, (void*(*)(void*)) &stopModuleThread, &moduleName);
if ( status != 0 )
log.writeLog(__LINE__, "stopModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string moduleName = (*listPT).DeviceName;
// bypass DISABLED modules
try{
int opState;
bool degraded = oam::ACTIVE;
oam.getModuleStatus(moduleName, opState, degraded);
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
//skip
continue;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
pthread_t stopmodulethread;
int status = pthread_create (&stopmodulethread, NULL, (void*(*)(void*)) &stopModuleThread, &moduleName);
if ( status != 0 )
log.writeLog(__LINE__, "stopModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
sleep(5);
}
}
else {
// stop all modules, like on a systemStart command
//launch stop module threads, stoping with local module
for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
string moduleName = (*pt).DeviceName;
// bypass DISABLED modules
try{
int opState = oam::ACTIVE;
bool degraded;
oam.getModuleStatus(moduleName, opState, degraded);
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
//skip
continue;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
pthread_t stopmodulethread;
string name = moduleName;
int status = pthread_create (&stopmodulethread, NULL, (void*(*)(void*)) &stopModuleThread, &name);
if ( status != 0 )
log.writeLog(__LINE__, "stopModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
usleep(50000);
}
}
}
// check status and process accordingly
int k = 0;
for( ; k < 1200 ; k++ )
{
string moduleName;
status = API_SUCCESS;
for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
moduleName = (*pt).DeviceName;
// get module status
try{
int opState = oam::ACTIVE;
bool degraded;
oam.getModuleStatus(moduleName, opState, degraded);
if ( opState == oam::FAILED ) {
status = API_FAILURE;
break;
}
if (opState == oam::MAN_DISABLED ||
opState == oam::AUTO_DISABLED ||
opState == oam::MAN_OFFLINE)
//skip
continue;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
continue;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
continue;
}
}
if( status == API_FAILURE )
break;
}
//get out of loop if all modules stopped successfully
if( status == API_SUCCESS ) {
break;
}
else
{
//get out of loop if stop module failed
if( status == API_FAILURE ) {
//set system status
log.writeLog(__LINE__, "stopSystemThread: Module failed, Set System State to FAILED: " + moduleName , LOG_TYPE_CRITICAL);
processManager.setSystemState(oam::FAILED);
break;
}
}
sleep(5);
}
if ( k == 1200 ) {
// system didn't Successfully restart
log.writeLog(__LINE__, "stopSystemThread: Modules failed to stop after 1200 tries, Set System State to FAILED" , LOG_TYPE_CRITICAL);
processManager.setSystemState(oam::FAILED);
status = oam::API_FAILURE;
}
else
{
processManager.setSystemState(oam::MAN_OFFLINE);
status = oam::API_SUCCESS;
}
// exit thread
stopsystemthreadStatus = status;
log.writeLog(__LINE__, "stopSystemThread Exit", LOG_TYPE_DEBUG);
pthread_detach (ThreadId);
pthread_exit(0);
}
/*****************************************************************************************
* @brief stopModuleThread
*
* purpose: Send Messages to Module Process Monitors to stop Processes
*
*****************************************************************************************/
void stopModuleThread(string module)
{
//store in a local variable
string moduleName = module;
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
pthread_t ThreadId;
ThreadId = pthread_self();
if ( moduleName.empty() ){
log.writeLog(__LINE__, "stopModuleThread received on invalid module name", LOG_TYPE_ERROR);
pthread_detach (ThreadId);
pthread_exit(0);
}
log.writeLog(__LINE__, "Stop Module " + moduleName, LOG_TYPE_DEBUG);
while(true)
{
// get module status
try{
int opState = oam::ACTIVE;
bool degraded;
oam.getModuleStatus(moduleName, opState, degraded);
if (opState == oam::MAN_OFFLINE)
//quit
break;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
int retStatus = processManager.stopModule(moduleName, oam::GRACEFUL, true);
log.writeLog(__LINE__, "ACK received from '" + moduleName + "' Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG);
if (retStatus == API_SUCCESS)
break;
else
{
if (retStatus != API_MINOR_FAILURE) {
//major failure, set stopsystem flag and exit this thread
break;
}
}
}
// exit thread
log.writeLog(__LINE__, "stopModuleThread Exit on " + moduleName, LOG_TYPE_DEBUG);
pthread_detach (ThreadId);
pthread_exit(0);
}
/*****************************************************************************************
* @brief checkSimplexModule
*
* purpose: Check for simplex module run-type and start mate processes if needed
*
*****************************************************************************************/
void ProcessManager::checkSimplexModule(std::string moduleName)
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
SystemModuleTypeConfig systemmoduletypeconfig;
SystemProcessConfig systemprocessconfig;
log.writeLog(__LINE__, "checkSimplexModule called for " + moduleName, LOG_TYPE_DEBUG);
try{
oam.getSystemConfig(systemmoduletypeconfig);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
return;
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
return;
}
string moduletype = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
for( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
if ( moduletype == systemmoduletypeconfig.moduletypeconfig[i].ModuleType ) {
if( systemmoduletypeconfig.moduletypeconfig[i].ModuleCount == 0)
return;
//check for SIMPLEX Processes on mate might need to be started
if( systemmoduletypeconfig.moduletypeconfig[i].RunType == SIMPLEX ) {
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
if ((*pt).DeviceName != moduleName) {
//mate module, check for module ACTIVE and SIMPLEX processes
int opState = oam::ACTIVE;
try{
bool degraded;
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
if (opState == oam::ACTIVE ||
opState == oam::DEGRADED ) {
//start COLD_STANDBY processes
try {
oam.getProcessConfig(systemprocessconfig);
for( unsigned int j = 0 ; j < systemprocessconfig.processconfig.size(); j++)
{
if ( systemprocessconfig.processconfig[j].ModuleType == moduletype &&
systemprocessconfig.processconfig[j].RunType == oam::SIMPLEX ) {
int state = oam::ACTIVE;
try{
ProcessStatus procstat;
oam.getProcessStatus(systemprocessconfig.processconfig[j].ProcessName,
(*pt).DeviceName, procstat);
state = procstat.ProcessOpState;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
continue;
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
continue;
}
if ( state == oam::COLD_STANDBY ) {
//set Primary UM Module
if ( systemprocessconfig.processconfig[j].ProcessName == "DDLProc" ) {
oam.setSystemConfig("PrimaryUMModuleName", (*pt).DeviceName);
//distribute config file
distributeConfigFile("system");
sleep(2);
//add MySQL Replication setup, if needed
log.writeLog(__LINE__, "Setup MySQL Replication for COLD_STANDBY DMLProc going ACTIVE", LOG_TYPE_DEBUG);
oam::DeviceNetworkList devicenetworklist;
processManager.setMySQLReplication(devicenetworklist, (*pt).DeviceName);
}
int status = processManager.startProcess((*pt).DeviceName,
systemprocessconfig.processconfig[j].ProcessName,
FORCEFUL);
if ( status == API_SUCCESS ) {
log.writeLog(__LINE__, "checkSimplexModule: mate process started: " + (*pt).DeviceName + "/" + systemprocessconfig.processconfig[j].ProcessName, LOG_TYPE_DEBUG);
//check to see if DDL/DML IPs need to be updated
if ( systemprocessconfig.processconfig[j].ProcessName == "DDLProc" )
setPMProcIPs((*pt).DeviceName);
}
else
log.writeLog(__LINE__, "checkSimplexModule: mate process failed to start: " + (*pt).DeviceName + "/" + systemprocessconfig.processconfig[j].ProcessName, LOG_TYPE_DEBUG);
}
else
{ // if found ACTIVE, skip to next process
if ( state == oam::ACTIVE )
return;
}
}
}
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "checkSimplexModule: EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "checkSimplexModule: EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
}
}
}
}
}
return;
}
/******************************************************************************************
* @brief updatePMSconfig
*
* purpose: Update PMS Configuration in System Configuration file
*
******************************************************************************************/
int ProcessManager::updatePMSconfig( bool check )
{
Oam oam;
int minPmPorts = 32;
vector<string> IpAddrs;
vector<int> nicIDs;
log.writeLog(__LINE__, "updatePMSconfig Started", LOG_TYPE_DEBUG);
pthread_mutex_lock(&THREAD_LOCK);
ModuleTypeConfig moduletypeconfig;
oam.getSystemConfig("pm", moduletypeconfig);
Config* sysConfig = Config::makeConfig();
string pmsIPAddr = sysConfig->getConfig("PMS1", "IPAddr");
//exit out if PMS already setup
if( pmsIPAddr != oam::UnassignedIpAddr &&
check)
{
log.writeLog(__LINE__, "updatePMSconfig: no update needed, exiting function", LOG_TYPE_DEBUG);
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
//exit out if PM module count is 1 or less
if( moduletypeconfig.ModuleCount <= 1 &&
check)
{
log.writeLog(__LINE__, "updatePMSconfig: no update needed, exiting function", LOG_TYPE_DEBUG);
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
int maxPMNicID = atoi(sysConfig->getConfig("PrimitiveServers", "ConnectionsPerPrimProc").c_str()) / 2;
int pmCount = 0;
//get Perfomance module IP addresses
DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin();
for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++)
{
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
continue;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
pmCount++;
HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();
for( ; pt1 != (*pt).hostConfigList.end() ; pt1++)
{
if ( (*pt1).IPAddr == oam::UnassignedIpAddr )
continue;
else
{
//check NIC status and don't assigned if down
try{
int state = oam::UP;
oam.getNICStatus((*pt1).HostName, state);
if ( state == oam::UP || state == oam::INITIAL) {
IpAddrs.push_back((*pt1).IPAddr);
nicIDs.push_back((*pt1).NicID);
}
}
catch (...)
{
IpAddrs.push_back((*pt1).IPAddr);
nicIDs.push_back((*pt1).NicID);
}
}
}
}
if( IpAddrs.empty()) {
log.writeLog(__LINE__, "updatePMSconfig: No up NICS found, exiting function", LOG_TYPE_DEBUG);
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
if( pmCount == 0) {
log.writeLog(__LINE__, "updatePMSconfig: No PM modules Enabled, exiting function", LOG_TYPE_DEBUG);
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
if( pmCount == 1 &&
pmsIPAddr != oam::UnassignedIpAddr &&
check )
{
log.writeLog(__LINE__, "updatePMSconfig: no update needed, exiting function", LOG_TYPE_DEBUG);
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
Configuration config;
//retry 5 times loop just in case
for(int i=0 ; i < 5; i++)
{
Config* sysConfig1 = Config::makeConfig();
//update PM count if needed
sysConfig1->setConfig("PrimitiveServers", "Count", oam.itoa(pmCount));
int pmPorts = pmCount * (maxPMNicID*2);
if ( pmPorts < minPmPorts )
pmPorts = minPmPorts;
const string PM = "PMS";
int nicID = 1;
for ( int pmsID = 1; pmsID < pmPorts+1 ; )
{
vector<string>::iterator pt = IpAddrs.begin();
vector<int>::iterator pt1 = nicIDs.begin();
for( ; pt != IpAddrs.end() ; pt++,pt1++)
{
if ( *pt1 == nicID ) {
string pmsName = PM + oam.itoa(pmsID);
sysConfig1->setConfig(pmsName, "IPAddr", *pt);
pmsID++;
}
if ( pmsID > pmPorts )
break;
}
if ( pmsID > pmPorts )
break;
nicID++;
if ( nicID > maxPMNicID )
nicID = 1;
}
//update Columnstore Config table
try {
sysConfig1->write();
pthread_mutex_unlock(&THREAD_LOCK);
log.writeLog(__LINE__, "updatePMSconfig completed", LOG_TYPE_DEBUG);
return API_SUCCESS;
}
catch(...)
{
log.writeLog(__LINE__, "updatePMSconfig - ERROR: sysConfig->write", LOG_TYPE_ERROR);
}
}
pthread_mutex_unlock(&THREAD_LOCK);
log.writeLog(__LINE__, "updatePMSconfig failed", LOG_TYPE_DEBUG);
return API_FAILURE;
}
/******************************************************************************************
* @brief updateWorkerNodeconfig
*
* purpose: Update WorkerNode Configuration in System Configuration file
*
******************************************************************************************/
int ProcessManager::updateWorkerNodeconfig()
{
Oam oam;
vector <string> module;
vector <string> ipadr;
log.writeLog(__LINE__, "updateWorkerNodeconfig Started", LOG_TYPE_DEBUG);
pthread_mutex_lock(&THREAD_LOCK);
//setup current module as work-node #1 by entering it in first
module.push_back(config.moduleName());
// get my IP address and update entries
ModuleConfig moduleconfig;
oam.getSystemConfig(config.moduleName(), moduleconfig);
HostConfigList::iterator pt0 = moduleconfig.hostConfigList.begin();
idbassert(pt0 != moduleconfig.hostConfigList.end());
ipadr.push_back(pt0->IPAddr);
SystemModuleTypeConfig systemmoduletypeconfig;
try
{
oam.getSystemConfig(systemmoduletypeconfig);
for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
if( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() )
// end of list
break;
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if ( moduleCount > 0 )
{
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++)
{
//skip current module
if ( (*pt).DeviceName == config.moduleName() )
continue;
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
continue;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
module.push_back((*pt).DeviceName);
HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();
ipadr.push_back((*pt1).IPAddr);
}
}
}
}
catch (...)
{
log.writeLog(__LINE__, "updateWorkerNodeconfig: getSystemNetworkConfig Failed", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
Configuration config;
for ( int i = 1 ; i < 5 ; i++ )
{
Config* sysConfig3 = Config::makeConfig();;
//update Columnstore.xml
sysConfig3->setConfig("DBRM_Controller", "NumWorkers", oam.itoa(module.size()));
std::vector<std::string>::iterator pt = module.begin();
std::vector<std::string>::iterator pt1 = ipadr.begin();
int id = 1;
for( ; pt != module.end() ; pt++,pt1++,id++)
{
string Section = "DBRM_Worker" + oam.itoa(id);
sysConfig3->setConfig(Section, "IPAddr", *pt1);
sysConfig3->setConfig(Section, "Module", *pt);
string moduleName = *pt;
sysConfig3->setConfig(Section, "Port", "8700");
}
//clear out any leftovers
for ( ; id < MAX_MODULE ; id++ )
{
string Section = "DBRM_Worker" + oam.itoa(id);
if ( sysConfig3->getConfig(Section, "IPAddr") != oam::UnassignedIpAddr &&
!sysConfig3->getConfig(Section, "IPAddr").empty())
sysConfig3->setConfig(Section, "IPAddr", oam::UnassignedIpAddr);
if ( sysConfig3->getConfig(Section, "Module") != oam::UnassignedIpAddr &&
!sysConfig3->getConfig(Section, "Module").empty())
sysConfig3->setConfig(Section, "Module", oam::UnassignedName);
}
try {
sysConfig3->write();
pthread_mutex_unlock(&THREAD_LOCK);
log.writeLog(__LINE__, "updateWorkerNodeconfig completed", LOG_TYPE_DEBUG);
return API_SUCCESS;
}
catch(...)
{
log.writeLog(__LINE__, "updateWorkerNodeconfig - ERROR: sysConfig->write", LOG_TYPE_ERROR);
}
}
pthread_mutex_unlock(&THREAD_LOCK);
log.writeLog(__LINE__, "updateWorkerNodeconfig failed", LOG_TYPE_DEBUG);
return API_FAILURE;
}
/******************************************************************************************
* @brief clearModuleAlarms
*
* purpose: Clears all alarms related to a module
*
******************************************************************************************/
void ProcessManager::clearModuleAlarms(std::string moduleName)
{
ALARMManager aManager;
AlarmList alarmList;
aManager.getActiveAlarm (alarmList);
AlarmList::iterator i;
for (i = alarmList.begin(); i != alarmList.end(); ++i)
{
// check if the same fault component on same module
if (moduleName.compare((i->second).getComponentID()) == 0 ||
moduleName.compare((i->second).getSname()) == 0)
{
// match, go clear it
aManager.sendAlarmReport((i->second).getComponentID().c_str(),
(i->second).getAlarmID(),
CLEAR,
(i->second).getSname().c_str(),
"ProcessManager");
}
}
}
/******************************************************************************************
* @brief clearNICAlarms
*
* purpose: Clears all alarms related to a NIC hostName
*
******************************************************************************************/
void ProcessManager::clearNICAlarms(std::string hostName)
{
ALARMManager aManager;
AlarmList alarmList;
aManager.getActiveAlarm (alarmList);
AlarmList::iterator i;
for (i = alarmList.begin(); i != alarmList.end(); ++i)
{
// check if the same fault component on same module
if (hostName.compare((i->second).getComponentID()) == 0)
{
// match, go clear it
aManager.sendAlarmReport((i->second).getComponentID().c_str(),
(i->second).getAlarmID(),
CLEAR,
(i->second).getSname().c_str(),
"ProcessManager");
}
}
}
/******************************************************************************************
* @brief updateExtentMap
*
* purpose: update Extent Map section in Columnstore.xml
*
******************************************************************************************/
bool ProcessManager::updateExtentMap()
{
string fileName = startup::StartUp::installDir() + "/etc/Columnstore.xml";
ifstream oldFile (fileName.c_str());
if (!oldFile) return false;
vector <string> lines;
char line[200];
string buf;
string newLine;
string start = "</Installation>";
string firstComment = "<!--";
string end = "</ExtentMap>";
string lastComment = "-->";
while (oldFile.getline(line, 200))
{
buf = line;
string::size_type pos = buf.find(start,0);
if (pos != string::npos)
{
//output to temp file and skip next line
lines.push_back(buf);
oldFile.getline(line, 200);
buf = line;
pos = buf.find(firstComment,0);
if (pos == string::npos)
{
return true;
}
}
else
{
pos = buf.find(end,0);
if (pos != string::npos)
{
//output to temp file and skip next line
lines.push_back(buf);
oldFile.getline(line, 200);
buf = line;
pos = buf.find(lastComment,0);
if (pos == string::npos)
{
return true;
}
}
else
//output to temp file
lines.push_back(buf);
}
}
oldFile.close();
unlink (fileName.c_str());
ofstream newFile (fileName.c_str());
//create new file
int fd = open(fileName.c_str(), O_RDWR|O_CREAT, 0664);
copy(lines.begin(), lines.end(), ostream_iterator<string>(newFile, "\n"));
newFile.close();
close(fd);
return true;
}
/******************************************************************************************
* @brief makeXMInittab
*
* purpose: Make inittab to auto-launch ProcMon
*
******************************************************************************************/
bool ProcessManager::makeXMInittab(std::string moduleName, std::string systemID, std::string parentOAMModuleHostName)
{
string fileName = startup::StartUp::installDir() + "/local/etc/" + moduleName + "/inittab.calpont";
vector <string> lines;
string init1 = "1" + systemID + ":2345:respawn:" + startup::StartUp::installDir() + "/bin/ProcMon " + parentOAMModuleHostName;
lines.push_back(init1);
unlink (fileName.c_str());
ofstream newFile (fileName.c_str());
//create new file
int fd = open(fileName.c_str(), O_RDWR|O_CREAT, 0664);
copy(lines.begin(), lines.end(), ostream_iterator<string>(newFile, "\n"));
newFile.close();
close(fd);
return true;
}
/******************************************************************************************
* @brief setPMProcIPs
*
* purpose: Updates the Columnstore.xml file for DDL/DMLProc IPs during PM switchover
*
*
******************************************************************************************/
int ProcessManager::setPMProcIPs( std::string moduleName, std::string processName )
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
ModuleConfig moduleconfig;
log.writeLog(__LINE__, "setPMProcIPs called for " + moduleName, LOG_TYPE_DEBUG);
pthread_mutex_lock(&THREAD_LOCK);
if ( processName == oam::UnassignedName || processName == "DDLProc")
{
for ( int i = 1 ; i < 5 ; i ++)
{
//get Module IP address
try
{
oam.getSystemConfig(moduleName, moduleconfig);
HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin();
string ipAdd = (*pt1).IPAddr;
Config* sysConfig2 = Config::makeConfig();
//check if IP address if different than current value, don't update if it is
if ( sysConfig2->getConfig("DDLProc", "IPAddr") == ipAdd ) {
log.writeLog(__LINE__, "setPMProcIPs for DDLProc: no update needed", LOG_TYPE_DEBUG);
break;
}
sysConfig2->setConfig("DDLProc", "IPAddr", ipAdd);
try {
sysConfig2->write();
pthread_mutex_unlock(&THREAD_LOCK);
log.writeLog(__LINE__, "setPMProcIPs: DDLProc to " + ipAdd, LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "setPMProcIPs - ERROR: sysConfig->write", LOG_TYPE_ERROR);
}
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "setPMProcIPs: EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "setPMProcIPs: EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
}
}
if ( processName == oam::UnassignedName || processName == "DMLProc")
{
for ( int i = 1 ; i < 5 ; i ++)
{
//get Module IP address
try
{
oam.getSystemConfig(moduleName, moduleconfig);
HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin();
string ipAdd = (*pt1).IPAddr;
Config* sysConfig2 = Config::makeConfig();
//check if IP address if different than current value, don't update if it is
if ( sysConfig2->getConfig("DMLProc", "IPAddr") == ipAdd ) {
log.writeLog(__LINE__, "setPMProcIPs for DMLProc: no update needed, exiting function", LOG_TYPE_DEBUG);
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
sysConfig2->setConfig("DMLProc", "IPAddr", ipAdd);
try {
sysConfig2->write();
pthread_mutex_unlock(&THREAD_LOCK);
log.writeLog(__LINE__, "setPMProcIPs: DMLProc to " + ipAdd, LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "setPMProcIPs - ERROR: sysConfig->write", LOG_TYPE_ERROR);
}
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "setPMProcIPs: EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "setPMProcIPs: EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
}
}
pthread_mutex_unlock(&THREAD_LOCK);
log.writeLog(__LINE__, "setPMProcIPs failed", LOG_TYPE_DEBUG);
return API_SUCCESS;
}
/******************************************************************************************
* @brief distributeConfigFile
*
* purpose: Distribute Columnstore Config File to system modules
*
******************************************************************************************/
int ProcessManager::distributeConfigFile(std::string name, std::string file)
{
ByteStream msg;
ByteStream::byte requestID = UPDATECONFIGFILE;
Oam oam;
int returnStatus = oam::API_SUCCESS;
log.writeLog(__LINE__, "distributeConfigFile called for " + name + " file = " + file, LOG_TYPE_DEBUG);
string dirName = startup::StartUp::installDir() + "/etc/";
string fileName = dirName + file;
ifstream in (fileName.c_str());
if (!in) {
log.writeLog(__LINE__, "distributeConfigFile failed, file doesn't exist: " + fileName, LOG_TYPE_ERROR);
return oam::API_FAILURE;
}
//skip any file of size 0
in.seekg(0, std::ios::end);
int size = in.tellg();
if ( size == 0 ) {
log.writeLog(__LINE__, "distributeConfigFile failed, file doesn't exist: " + fileName, LOG_TYPE_ERROR);
return oam::API_FAILURE;
}
// distribute using hdfs call, make sure host names are in /etc/pdsh/machines
ifstream in1 ("/etc/pdsh/machines");
if (in1) {
if ( HDFS )
{
if ( name == "system" )
{
string cmd = "pdcp -a -x " + localHostName + " " + fileName + " " + dirName;
int rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) == 0)
{
log.writeLog(__LINE__, "distributeConfigFile using pdcp successful on " + fileName, LOG_TYPE_DEBUG);
return returnStatus;
}
else
{
log.writeLog(__LINE__, "distributeConfigFile using pdcp failed on " + fileName, LOG_TYPE_ERROR);
}
}
else
{
// get module hostname
ModuleConfig moduleconfig;
oam.getSystemConfig(name, moduleconfig);
HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin();
string hostName = (*pt1).HostName;
string cmd = "pdcp -w " + hostName + " " + fileName + " " + dirName;
int rtnCode = system(cmd.c_str());
if (WEXITSTATUS(rtnCode) == 0)
{
log.writeLog(__LINE__, "distributeConfigFile using pdcp successful on " + fileName, LOG_TYPE_DEBUG);
return returnStatus;
}
else
{
log.writeLog(__LINE__, "distributeConfigFile using pdcp failed on " + fileName, LOG_TYPE_ERROR);
}
}
}
}
//send via tcp messaging
msg << requestID;
msg << fileName;
in.seekg(0, std::ios::beg);
in >> msg;
SystemModuleTypeConfig systemmoduletypeconfig;
try{
oam.getSystemConfig(systemmoduletypeconfig);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
if ( name == "system" ) {
// send config file to all modules
for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
//skip local module
if ( (*pt).DeviceName == config.moduleName() )
continue;
//skip if AOS
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
}
catch(...)
{}
if (opState == oam::AUTO_DISABLED)
continue;
returnStatus = sendMsgProcMon( (*pt).DeviceName, msg, requestID, 0 );
if ( returnStatus == API_SUCCESS)
{
//log the success event
log.writeLog(__LINE__, (*pt).DeviceName + " distributeConfigFile success.", LOG_TYPE_DEBUG);
}
else
{
//log the error event
log.writeLog(__LINE__, (*pt).DeviceName + " distributeConfigFile failed!!", LOG_TYPE_ERROR);
}
}
}
}
else
{
returnStatus = sendMsgProcMon( name, msg, requestID, 0 );
if ( returnStatus == API_SUCCESS)
{
//log the success event
log.writeLog(__LINE__, name + " distributeConfigFile success.", LOG_TYPE_DEBUG);
}
else
{
//log the error event
log.writeLog(__LINE__, name + " distributeConfigFile failed!!", LOG_TYPE_ERROR);
}
}
return returnStatus;
}
/******************************************************************************************
* @brief getDBRMData
*
* purpose: get DBRM Data and send to requester
*
******************************************************************************************/
int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleName)
{
ByteStream msg;
Oam oam;
int returnStatus = oam::API_SUCCESS;
pthread_mutex_lock(&THREAD_LOCK);
messageqcpp::IOSocket cfIos = fIos;
string DBRMroot;
oam.getSystemConfig("DBRMRoot", DBRMroot);
string currentFileName = DBRMroot + "_current";
string journalFileName = DBRMroot + "_journal";
string oidFile;
oam.getSystemConfig("OIDBitmapFile", oidFile);
string currentDbrmFile;
ifstream oldFile (currentFileName.c_str());
if (oldFile) {
// current file found, check for OIDBitmapFile
ifstream mapFile (oidFile.c_str());
if (!mapFile) {
// no OIDBitmapFile, with current file, dbrm files are hosed
log.writeLog(__LINE__, "getDBRMData: DBRM data files error, current file exist without OIDBitmapFile", LOG_TYPE_CRITICAL);
pthread_mutex_unlock(&THREAD_LOCK);
return oam::API_FAILURE_DB_ERROR;
}
char line[200];
oldFile.getline(line, 200);
currentDbrmFile = line;
}
else
{
log.writeLog(__LINE__, "getDBRMData: no DBRM current file found, must be initial install", LOG_TYPE_DEBUG);
msg << "initial";
try {
cfIos.write(msg);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR);
returnStatus = oam::API_FAILURE;
}
pthread_mutex_unlock(&THREAD_LOCK);
return returnStatus;
}
string fileName = startup::StartUp::installDir() + "/local/dbrmfiles";
unlink(fileName.c_str());
string cmd = "ls " + currentDbrmFile + "_* >> " + startup::StartUp::installDir() + "/local/dbrmfiles";
system(cmd.c_str());
ifstream file (fileName.c_str());
if (!file) {
log.writeLog(__LINE__, "getDBRMData: no DBRM files found, must be initial install", LOG_TYPE_DEBUG);
msg << "initial";
try {
cfIos.write(msg);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR);
returnStatus = oam::API_FAILURE;
}
pthread_mutex_unlock(&THREAD_LOCK);
return returnStatus;
}
vector <string> dbrmFiles;
char line[200];
string buf;
while (file.getline(line, 200))
{
buf = line;
dbrmFiles.push_back(buf);
}
file.close();
if ( dbrmFiles.size() < 1 ) {
log.writeLog(__LINE__, "getDBRMData: dbrmFiles size = 0, must be initial install", LOG_TYPE_DEBUG);
msg << "initial";
try {
cfIos.write(msg);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR);
returnStatus = oam::API_FAILURE;
}
pthread_mutex_unlock(&THREAD_LOCK);
return returnStatus;
}
// put oid file and current file in list
dbrmFiles.push_back(currentFileName);
ifstream file1 (journalFileName.c_str());
if (file1)
dbrmFiles.push_back(journalFileName);
ifstream file2 (oidFile.c_str());
if (file2)
dbrmFiles.push_back(oidFile);
//type
msg << "files";
try {
cfIos.write(msg);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return oam::API_FAILURE;
}
//remove any file of size 0
std::vector<std::string>::iterator pt1 = dbrmFiles.begin();
for( ; pt1 != dbrmFiles.end() ; pt1++)
{
string fileName = *pt1;
ifstream in(fileName.c_str());
in.seekg(0, std::ios::end);
int size = in.tellg();
if ( size == 0 )
dbrmFiles.erase(pt1);
}
ByteStream fcmsg;
// number of files
fcmsg << (ByteStream::byte) dbrmFiles.size();
try {
cfIos.write(fcmsg);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return oam::API_FAILURE;
}
pt1 = dbrmFiles.begin();
for( ; pt1 != dbrmFiles.end() ; pt1++)
{
ByteStream fnmsg,fdmsg;
string fileName = *pt1;
ifstream in(fileName.c_str());
//skip any file of size 0
in.seekg(0, std::ios::end);
int size = in.tellg();
if ( size == 0 )
continue;
in.seekg(0, std::ios::beg);
log.writeLog(__LINE__, fileName, LOG_TYPE_DEBUG);
fnmsg << fileName;
try {
cfIos.write(fnmsg);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return oam::API_FAILURE;
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return oam::API_FAILURE;
}
in >> fdmsg;
try {
cfIos.write(fdmsg);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return oam::API_FAILURE;
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return oam::API_FAILURE;
}
}
try {
cfIos.write(msg);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR);
returnStatus = oam::API_FAILURE;
}
pthread_mutex_unlock(&THREAD_LOCK);
return returnStatus;
}
/******************************************************************************************
* @brief switchParentOAMModule
*
* purpose: Switch OAM Parent Module
*
******************************************************************************************/
int ProcessManager::switchParentOAMModule(std::string newActiveModuleName)
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
int returnStatus = oam::API_SUCCESS;
ALARMManager aManager;
log.writeLog(__LINE__, "switchParentOAMModule Function Started", LOG_TYPE_DEBUG);
if ( DBRootStorageType == "internal" && GlusterConfig == "n") {
log.writeLog(__LINE__, "ERROR: DBRootStorageType = internal", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_INVALID_PARAMETER;
}
// set alarm
aManager.sendAlarmReport(newActiveModuleName.c_str(), MODULE_SWITCH_ACTIVE, SET);
//clear run standby flag;
runStandby = false;
int moduleID = atoi(newActiveModuleName.substr(MAX_MODULE_TYPE_SIZE,MAX_MODULE_ID_SIZE).c_str());
// update Columnstore.xml entries
string newActiveIPaddr;
try
{
pthread_mutex_lock(&THREAD_LOCK);
//move a newparent dbroot to old parent for balancing
DBRootConfigList residedbrootConfigList;
try
{
oam.getPmDbrootConfig(moduleID, residedbrootConfigList);
if ( residedbrootConfigList.size() > 0 )
{
DBRootConfigList::iterator pt = residedbrootConfigList.begin();
try {
oam.manualMovePmDbroot(newActiveModuleName, oam.itoa(*pt), config.OAMParentName());
}
catch (...)
{
log.writeLog(__LINE__, "ERROR: manualMovePmDbroot Failed", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
}
}
catch (...)
{
log.writeLog(__LINE__, "ERROR: getPmDbrootConfig Failed", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
//move dbroot #1 to new parent
try {
oam.manualMovePmDbroot(config.OAMParentName(), "1", newActiveModuleName);
}
catch (...)
{
log.writeLog(__LINE__, "ERROR: manualMovePmDbroot Failed", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
Config* sysConfig4 = Config::makeConfig();
// get new Active address
ModuleConfig moduleconfig;
oam.getSystemConfig(newActiveModuleName, moduleconfig);
HostConfigList::iterator pt2 = moduleconfig.hostConfigList.begin();
newActiveIPaddr = (*pt2).IPAddr;
sysConfig4->setConfig("ProcMgr", "IPAddr", newActiveIPaddr);
sysConfig4->setConfig("ProcStatusControl", "IPAddr", newActiveIPaddr);
sysConfig4->setConfig("DBRM_Controller", "IPAddr", newActiveIPaddr);
// update Parent OAM Module name to current module name
sysConfig4->setConfig("SystemConfig", "ParentOAMModuleName", newActiveModuleName);
// clear Standby OAM Module
sysConfig4->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
sysConfig4->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);
//update Columnstore Config table
try {
sysConfig4->write();
}
catch(...)
{
log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
pthread_mutex_unlock(&THREAD_LOCK);
if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM )
{
//set DDL/DMLproc IPs to new module
setPMProcIPs(newActiveModuleName);
//set Primary UM to new module
try {
oam.setSystemConfig("PrimaryUMModuleName", newActiveModuleName);
}
catch(...) {}
}
log.writeLog(__LINE__, "Columnstore.xml entries update to local IP address of " + newActiveIPaddr, LOG_TYPE_DEBUG);
//distribute config file
processManager.distributeConfigFile("system");
sleep(1);
//change master MySQL Replication setup
log.writeLog(__LINE__, "Setup MySQL Replication for new Parent Module during switch-over", LOG_TYPE_DEBUG);
oam::DeviceNetworkList devicenetworklist;
processManager.setMySQLReplication(devicenetworklist, newActiveModuleName, false, false, oam::UnassignedName);
}
catch (exception& ex)
{
pthread_mutex_unlock(&THREAD_LOCK);
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
return API_FAILURE;
}
catch(...)
{
pthread_mutex_unlock(&THREAD_LOCK);
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
return API_FAILURE;
}
//send message to local Process Monitor for OAM Cold Activation
ByteStream msg1;
ByteStream::byte requestID = OAMPARENTCOLD;
msg1 << requestID;
while(true)
{
int returnStatus = sendMsgProcMon( config.moduleName(), msg1, requestID );
log.writeLog(__LINE__, "sent OAM Parent Cold message to local Process-Monitor, status: " + oam.itoa(returnStatus) , LOG_TYPE_DEBUG);
if ( returnStatus == oam::API_SUCCESS)
break;
}
//send message to new Active Process Monitor for OAM Parent Activation
ByteStream msg;
requestID = OAMPARENTACTIVE;
msg << requestID;
while(true)
{
int returnStatus = sendMsgProcMon( newActiveModuleName, msg, requestID );
log.writeLog(__LINE__, "sent OAM Parent Activate message to New Active Process-Monitor, status: " + oam.itoa(returnStatus) , LOG_TYPE_DEBUG);
if ( returnStatus == oam::API_SUCCESS)
break;
}
// start processmanager on new active node
startProcess(newActiveModuleName, "ProcessManager", oam::FORCEFUL);
// clear alarm
aManager.sendAlarmReport(newActiveModuleName.c_str(), MODULE_SWITCH_ACTIVE, CLEAR);
//DOING THIS JUST TO UPDATE THE TIMESTAMP OF THE CALPONT.XML FILE AS A WORK-AROUND FIX
//BECAUSE PROCMON ISN'T READING UPDATES FROM DISK ON HDFS SYSTEMS
if (HDFS)
{
sleep(60);
Config* sysConfig = Config::makeConfig();
try {
sysConfig->write();
}
catch(...)
{
log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
}
return returnStatus;
}
/******************************************************************************************
* @brief OAMParentModuleChange
*
* purpose: OAM Parent Module Change-over
* The module will take over running as the OAM Parent module
* after a detected outage
*
*
******************************************************************************************/
int ProcessManager::OAMParentModuleChange()
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
//
//monitor OAM Parent module for outage
//
log.writeLog(__LINE__, "OAMParentModuleChange Function Started", LOG_TYPE_DEBUG);
// Get Module Info
SystemModuleTypeConfig systemModuleTypeConfig;
try{
oam.getSystemConfig(systemModuleTypeConfig);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
string downOAMParentIPAddress;
string downOAMParentHostname;
string downOAMParentName = config.OAMParentName();
//Build module list
vector<string> moduleNameList;
vector<string> moduleIPAddrList;
for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount;
if ( moduleCount == 0 )
// skip of no modules configured
continue;
DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin();
for( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++)
{
HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();
//get parent module IP address
if ( (*pt).DeviceName == downOAMParentName ) {
downOAMParentIPAddress = (*pt1).IPAddr;
downOAMParentHostname = (*pt1).HostName;
continue;
}
//store the other modules
if ( (*pt).DeviceName != config.moduleName() ) {
moduleNameList.push_back((*pt).DeviceName);
moduleIPAddrList.push_back((*pt1).IPAddr);
}
}
}
string HA_IPAddr;
if ( moduleIPAddrList.empty() )
{
//get HA IP Address
Config* sysConfig = Config::makeConfig();
HA_IPAddr = sysConfig->getConfig("ProcMgr_HA", "IPAddr");
log.writeLog(__LINE__, "Get HA_IPAddr = " + HA_IPAddr, LOG_TYPE_DEBUG);
if ( !HA_IPAddr.empty() ) {
moduleNameList.push_back("HA_device");
moduleIPAddrList.push_back(HA_IPAddr);
}
}
int ModuleHeartbeatCount;
try {
oam.getSystemConfig("ModuleHeartbeatCount", ModuleHeartbeatCount);
}
catch (exception& ex) {
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
string cmdLine = "ping ";
string cmdOption = " -c 1 -w 5 >> /dev/null";
string cmd;
int pingFailure = 0;
bool failover = false;
bool recoveryTest = false;
int disableCount = 0;
int noAckCount = 0;
bool amazonParentRestart = false;
while(!failover)
{
// check if a signal was received to start failover
if (startFailOver) {
//send notification going from standby to active
oam.sendDeviceNotification(config.moduleName(), START_STANDBY_TO_MASTER);
break;
}
// perform ping test of Active Parent Module
string cmd = cmdLine + downOAMParentIPAddress + cmdOption;
int rtnCode = system(cmd.c_str());
switch (WEXITSTATUS(rtnCode)) {
case 0:
{
//Ack ping
pingFailure = 0;
if ( noAckCount != 0 )
oam.sendDeviceNotification(config.moduleName(), MODULE_UP);
noAckCount = 0;
//if Amazon Parent PM is restarting, monitor when back active and take needed actions
if (amazonParentRestart)
{
log.writeLog(__LINE__, "Amazon Parent pinging, waiting until it's active", LOG_TYPE_DEBUG);
sleep(60);
while(true)
{
SystemStatus systemstatus;
try {
oam.getSystemStatus(systemstatus);
}
catch(...)
{}
if (systemstatus.SystemOpState == ACTIVE) {
log.writeLog(__LINE__, "System Active, restart needed processes", LOG_TYPE_DEBUG);
processManager.restartProcessType("mysql");
processManager.restartProcessType("ExeMgr");
processManager.restartProcessType("WriteEngineServer");
processManager.reinitProcessType("DBRMWorkerNode");
sleep(1);
processManager.restartProcessType("DDLProc");
sleep(1);
processManager.restartProcessType("DMLProc");
amazonParentRestart = false;
break;
}
sleep(5);
}
}
sleep(1);
break;
}
default:
{
//failed to respond to ping
pingFailure++;
log.writeLog(__LINE__, "OAMParentModule ping failure (" + downOAMParentName + ")", LOG_TYPE_WARNING);
if ( pingFailure >= ModuleHeartbeatCount ) {
bool ack = false;
bool noack = false;
//check NIC #1 status
int sockfd;
struct ifreq ifr;
sockfd = socket(AF_INET, SOCK_DGRAM, 0);
if(sockfd == -1){
log.writeLog(__LINE__, "Could not get socket to check NIC #1", LOG_TYPE_ERROR);
close(sockfd);
break;
}
/* get interface name */
strncpy(ifr.ifr_name, iface_name.c_str(), IFNAMSIZ);
/* Read interface flags */
if (ioctl(sockfd, SIOCGIFFLAGS, &ifr) < 0) {
// not supported
close(sockfd);
break;
}
if (ifr.ifr_flags & IFF_UP) {
log.writeLog(__LINE__, "Local Interface is UP", LOG_TYPE_INFO);
// any additional devices/modules to test
if ( !moduleNameList.empty()) {
// Active Parent not talking, check other modules or HA IP address
for ( int count = 0 ; count <= ModuleHeartbeatCount ; count++ )
{
vector<string>::iterator pt1 = moduleNameList.begin();
vector<string>::iterator pt2 = moduleIPAddrList.begin();
for( ; pt1 != moduleNameList.end() ; pt1++, pt2++)
{
string cmd = cmdLine + *pt2 + cmdOption;
int rtnCode = system(cmd.c_str());
switch (WEXITSTATUS(rtnCode)) {
case 0:
{ //Ack ping
log.writeLog(__LINE__, *pt1 + " ping successful", LOG_TYPE_DEBUG);
ack = true;
break;
}
default:
{ // ping failure
log.writeLog(__LINE__, *pt1 + " ping failure", LOG_TYPE_WARNING);
noack = true;
//save module name
if ( *pt1 != "HA_device" )
downModuleList.push_back(*pt1);
break;
}
}
// exit loop if ping was successfuly
if ( ack )
break;
sleep (2);
}
// exit loop if ping was successfuly
if ( ack )
break;
}
}
else
{
// NIC #1 up, procede with failover
failover = true;
}
}
else
{
log.writeLog(__LINE__, "NIC #1 is DOWN", LOG_TYPE_WARNING);
// NIC #1 down, dont switch
noack = true;
if ( noAckCount == 0 )
oam.sendDeviceNotification(config.moduleName(), MODULE_DOWN);
noAckCount++;
}
close(sockfd);
//check if all modules are not responding to ping
if ( !ack && noack ) {
// yes, go into hold state by setting local module to cold-state
ByteStream msg;
ByteStream::byte requestID = OAMPARENTCOLD;
msg << requestID;
int returnStatus = processManager.sendMsgProcMon( config.moduleName(), msg, requestID );
log.writeLog(__LINE__, "sent OAM Parent Cold message to local Process-Monitor, status: " + oam.itoa(returnStatus) , LOG_TYPE_DEBUG);
}
else
{
if ( ack && !noack ) {
// all other modules ACK, only parent failed, procede with failover
failover = true;
break;
}
else
{
if ( ack && noack && !recoveryTest) {
// some other modules ACK, some didn't
// try 1 more time and mark sure didn't catch in the middle of a LAN recovery
recoveryTest = true;
}
else
{
if ( ack && noack && recoveryTest) {
// some other modules ACK, some didn't, partial outage, do failover
failover = true;
break;
}
}
}
}
}
}
}
if ( !failover ) {
sleep(5);
downModuleList.clear();
}
else
{
// PARENT PM OUTAGE DETECTED
// check if disable flag is set, if so call the notification API
string activePmFailoverDisabled;
try {
oam.getSystemConfig("ActivePmFailoverDisabled", activePmFailoverDisabled);
if ( activePmFailoverDisabled == "y" ) {
log.writeLog(__LINE__, "ActivePmFailoverDisabled is set, send notication", LOG_TYPE_DEBUG);
oam.sendDeviceNotification(downOAMParentName, PM_MASTER_FAILED_DISABLED);
failover = false;
sleep(5);
disableCount++;
if ( disableCount > 4 ) {
//no manually failover has been called, go ahead and do auto-failover
//send notification going from standby to active
log.writeLog(__LINE__, "ActivePmFailoverDisabled is set, but no manual action has been taken. Do Auto-Failover", LOG_TYPE_DEBUG);
oam.sendDeviceNotification(config.moduleName(), START_STANDBY_TO_MASTER);
}
}
else
{
//send notification going from standby to active
oam.sendDeviceNotification(config.moduleName(), START_STANDBY_TO_MASTER);
}
}
catch (exception& ex)
{}
//do amazon failover
if (amazon && AmazonPMFailover == "n")
{
log.writeLog(__LINE__, " ", LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "*** OAMParentModule outage, AmazonPMFailover not set, wating for instance to restart ***", LOG_TYPE_DEBUG);
string currentIPAddr = oam.getEC2InstanceIpAddress(downOAMParentHostname);
if (currentIPAddr == "stopped")
{ // start instance
int retryCount = 6; // 1 minutes
if ( PMInstanceType == "m2.4xlarge" )
retryCount = 15; // 2.5 minutes
log.writeLog(__LINE__, "Instance in stopped state, try starting it: " + downOAMParentHostname, LOG_TYPE_DEBUG);
int retry = 0;
for ( ; retry < retryCount ; retry++ )
{
if ( oam.startEC2Instance(downOAMParentHostname) )
{
log.writeLog(__LINE__, "Instance started, sleep for 30 seconds to allow it to fully come up: " + downOAMParentHostname, LOG_TYPE_DEBUG);
//delay then get new IP Address
sleep(30);
string currentIPAddr = oam.getEC2InstanceIpAddress(downOAMParentHostname);
if (currentIPAddr == "stopped" || currentIPAddr == "terminated") {
log.writeLog(__LINE__, "Instance failed to start (no ip-address), retry: " + downOAMParentHostname, LOG_TYPE_DEBUG);
}
else
{
// update the Columnstore.xml with the new IP Address
string cmd = "sed -i s/" + downOAMParentIPAddress + "/" + currentIPAddr + "/g " + startup::StartUp::installDir() + "/etc/Columnstore.xml";
system(cmd.c_str());
// get parent hotsname and IP address in case it changed
downOAMParentIPAddress = currentIPAddr;
amazonParentRestart = true;
break;
}
}
else
{
log.writeLog(__LINE__, "Instance failed to start, retry: " + downOAMParentHostname, LOG_TYPE_DEBUG);
sleep(5);
}
}
if ( retry >= retryCount )
{
log.writeLog(__LINE__, "Instance failed to start, restart a new instance: " + downOAMParentHostname, LOG_TYPE_DEBUG);
currentIPAddr = "terminated";
}
}
if ( currentIPAddr != "terminated")
{
log.writeLog(__LINE__, "Instance rebooting, monitor", LOG_TYPE_DEBUG);
//clear and go monitor again
failover = false;
amazonParentRestart = true;
}
else
log.writeLog(__LINE__, "Instance terminated, do standby-active failover", LOG_TYPE_DEBUG);
}
if ( DBRootStorageType == "internal" && failover && GlusterConfig == "n")
{
log.writeLog(__LINE__, "DBRoot Storage configured for internal, don't do standby-active failover", LOG_TYPE_DEBUG);
//clear and go monitor again
failover = false;
}
}
}
log.writeLog(__LINE__, " ", LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "*** OAMParentModule outage, OAM Parent Module change-over started ***", LOG_TYPE_DEBUG);
//run save.brm script
processManager.saveBRM(true, false);
//set query system state not ready
processManager.setQuerySystemState(false);
gdownActiveOAMModule = downOAMParentName;
// update Columnstore.xml entries
string localIPaddr;
string newStandbyModule = downOAMParentName;
string standbyIPaddr = downOAMParentIPAddress;
try
{
pthread_mutex_lock(&THREAD_LOCK);
Config* sysConfig4 = Config::makeConfig();
// get my IP address
ModuleConfig moduleconfig;
oam.getSystemConfig(config.moduleName(), moduleconfig);
HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin();
localIPaddr = (*pt1).IPAddr;
sysConfig4->setConfig("ProcMgr", "IPAddr", localIPaddr);
sysConfig4->setConfig("ProcStatusControl", "IPAddr", localIPaddr);
sysConfig4->setConfig("DBRM_Controller", "IPAddr", localIPaddr);
// update Parent OAM Module name to current module name
sysConfig4->setConfig("SystemConfig", "ParentOAMModuleName", config.moduleName());
// clear Standby OAM Module
sysConfig4->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
sysConfig4->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);
//update Columnstore Config table
try {
sysConfig4->write();
}
catch(...)
{
log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_FAILURE;
}
pthread_mutex_unlock(&THREAD_LOCK);
//clear run standby flag;
runStandby = false;
//sleep, give time for message thread to startup
sleep(5);
try {
oam.autoMovePmDbroot(downOAMParentName);
}
catch (...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR);
}
//distribute config file
distributeConfigFile("system");
//re-read config info again
Configuration config;
oam.setHotStandbyPM(standbyIPaddr);
log.writeLog(__LINE__, "Columnstore.xml Standby OAM updated : " + newStandbyModule + ":" + standbyIPaddr, LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Columnstore.xml entries update to local IP address of " + localIPaddr, LOG_TYPE_DEBUG);
}
catch (exception& ex)
{
pthread_mutex_unlock(&THREAD_LOCK);
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
return API_FAILURE;
}
catch(...)
{
pthread_mutex_unlock(&THREAD_LOCK);
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
return API_FAILURE;
}
if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) {
//set DDL/DMLproc IPs to local module
setPMProcIPs(config.moduleName());
try {
oam.setSystemConfig("PrimaryUMModuleName", config.moduleName());
}
catch(...) {}
}
//send message to local Process Monitor for OAM Parent Activation
ByteStream msg;
ByteStream::byte requestID = OAMPARENTACTIVE;
msg << requestID;
while(true)
{
int returnStatus = sendMsgProcMon( config.moduleName(), msg, requestID );
log.writeLog(__LINE__, "sent OAM Parent Activate message to local Process-Monitor, status: " + oam.itoa(returnStatus) , LOG_TYPE_DEBUG);
if ( returnStatus == oam::API_SUCCESS)
break;
}
//set Process Manager state, will make sure process-monitor status control is working
while (true)
{
try{
ProcessStatus procstat;
oam.getProcessStatus("ProcessManager", config.moduleName(), procstat);
int ret = setProcessState(config.moduleName(), "ProcessManager", oam::ACTIVE, 0);
if ( ret == oam::API_SUCCESS ) {
oam.getProcessStatus("ProcessManager", config.moduleName(), procstat);
if ( procstat.ProcessOpState == oam::ACTIVE )
break;
}
}
catch (...)
{}
sleep(1);
}
//set status to BUSY_INIT while failover is in progress
processManager.setSystemState(oam::BUSY_INIT);
// graceful start snmptrap-daemon
string EnableSNMP = "y";
try {
oam.getSystemConfig("EnableSNMP", EnableSNMP);
}
catch(...)
{}
if ( EnableSNMP == "y" )
startProcess(config.moduleName(), "SNMPTrapDaemon", oam::GRACEFUL);
// set alarm
ALARMManager aManager;
aManager.sendAlarmReport(config.moduleName().c_str(), MODULE_SWITCH_ACTIVE, SET);
//set down Active module to disable state
disableModule(downOAMParentName, false);
//do it here to get current processes active faster to process queries faster
processManager.setProcessStates(downOAMParentName, oam::AUTO_OFFLINE);
//set other down modules to disable state
vector<string>::iterator pt1 = downModuleList.begin();
for( ; pt1 != downModuleList.end() ; pt1++)
{
disableModule(*pt1, false);
processManager.setProcessStates(*pt1, oam::AUTO_OFFLINE);
}
//distribute config file
distributeConfigFile("system");
//restart local module
processManager.stopModule(config.moduleName(), oam::FORCEFUL, true);
string localModule = config.moduleName();
// processManager.setModuleState(localModule, oam::AUTO_INIT);
pthread_t startmodulethread;
int status = pthread_create (&startmodulethread, NULL, (void*(*)(void*)) &startModuleThread, &localModule);
if ( status != 0 )
log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
if (status == 0)
{
pthread_join(startmodulethread, NULL);
status = startsystemthreadStatus;
}
//restart/reinit processes to force their release of the controller node port
if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM) &&
( moduleNameList.size() <= 1 && config.moduleType() == "pm") )
{
status = 0;
}
else
{
processManager.restartProcessType("mysql", localModule);
processManager.restartProcessType("ExeMgr", localModule);
processManager.restartProcessType("WriteEngineServer", localModule);
processManager.reinitProcessType("DBRMWorkerNode");
//send message to start new Standby Process-Manager, if needed
newStandbyModule = getStandbyModule();
if ( !newStandbyModule.empty() && newStandbyModule != downOAMParentName
&& newStandbyModule != "NONE") {
// get standby IP address and update entries
setStandbyModule(newStandbyModule);
}
//send message to each child process to start any COLD_STANDBY processes
SystemModuleTypeConfig systemmoduletypeconfig;
try{
oam.getSystemConfig(systemmoduletypeconfig);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
for( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
}
catch (exception& ex)
{
string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
if (opState != oam::MAN_DISABLED) {
if (opState != oam::AUTO_DISABLED) {
if ((*pt).DeviceName != downOAMParentName ) {
if ((*pt).DeviceName != config.moduleName() ) {
// processManager.setModuleState((*pt).DeviceName, oam::AUTO_INIT);
pthread_t startmodulethread;
string moduleName = (*pt).DeviceName;
int status = pthread_create (&startmodulethread, NULL, (void*(*)(void*)) &startModuleThread, &moduleName);
if ( status != 0 )
log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
sleep(1);
}
}
}
}
}
}
}
//restart DDLProc/DMLProc to perform any rollbacks, if needed
//dont rollback in amazon, wait until down pm recovers
if ( ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM )
&& !amazon ) {
processManager.restartProcessType("DDLProc", config.moduleName());
sleep(1);
processManager.restartProcessType("DMLProc", config.moduleName());
}
if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM )
{
//change master MySQL Replication setup
log.writeLog(__LINE__, "Setup this node as MySQL Replication Master", LOG_TYPE_DEBUG);
oam::DeviceNetworkList devicenetworklist;
processManager.setMySQLReplication(devicenetworklist, config.moduleName(), true);
}
//set query system state not ready
processManager.setQuerySystemState(true);
// clear alarm
aManager.sendAlarmReport(config.moduleName().c_str(), MODULE_SWITCH_ACTIVE, CLEAR);
//set status to ACTIVE while failover is in progress
processManager.setSystemState(oam::ACTIVE);
log.writeLog(__LINE__, "*** Exiting OAMParentModuleChange function ***", LOG_TYPE_DEBUG);
return API_SUCCESS;
}
/******************************************************************************************
* @brief sendStatusUpdate
*
* purpose: Send Status Update to Process Monitor
*
*
******************************************************************************************/
void ProcessManager::sendStatusUpdate(ByteStream obs, ByteStream::byte returnRequestType)
{
try
{
MessageQueueClient processor("ProcStatusControl");
ByteStream ibs;
processor.write(obs);
// wait 10 seconds for ACK from Process Monitor
struct timespec ts = { 10, 0 };
ibs = processor.read(&ts);
if (ibs.length() > 0)
{
ByteStream::byte status;
ibs >> status;
if ( status == oam::API_SUCCESS ) {
processor.shutdown();
}
else
{
// shutdown connection
processor.shutdown();
throw std::runtime_error("error");
}
}
else
{
// timeout occurred, shutdown connection
processor.shutdown();
throw std::runtime_error("timeout");
}
}
catch(...)
{
throw std::runtime_error("timeout");
}
Configuration config;
Config* sysConfig5 = Config::makeConfig();
if ( sysConfig5->getConfig("ProcStatusControlStandby", "IPAddr") == oam::UnassignedIpAddr )
return;
try
{
MessageQueueClient processor("ProcStatusControlStandby");
ByteStream ibs;
processor.write(obs);
processor.shutdown();
}
catch(...)
{}
return;
}
/******************************************************************************************
* @brief getStandbyModule
*
* purpose: find an avaliable hot-standby module based on Process-Manager status, if one exist
*
*
******************************************************************************************/
std::string ProcessManager::getStandbyModule()
{
Oam oam;
SystemProcessStatus systemprocessstatus;
ProcessStatus processstatus;
string backupStandbyModule = "NONE";
string newStandbyModule = "NONE";
log.writeLog(__LINE__, "getStandbyModule called", LOG_TYPE_DEBUG);
//check if gluster, if so then find PMs that have copies of DBROOT #1
string pmList = "";
if (GlusterConfig == "y") {
try {
string errmsg;
oam.glusterctl(oam::GLUSTER_WHOHAS, "1", pmList, errmsg);
log.writeLog(__LINE__, "GLUSTER_WHOHAS called:" + pmList, LOG_TYPE_DEBUG);
boost::char_separator<char> sep(" ");
boost::tokenizer< boost::char_separator<char> > tokens(pmList, sep);
for ( boost::tokenizer< boost::char_separator<char> >::iterator it = tokens.begin();
it != tokens.end();
++it)
{
string pm = "pm" + *it;
// skip if current module
if ( pm == config.moduleName() )
continue;
int opState;
bool degraded;
try {
oam.getModuleStatus(pm, opState, degraded);
}
catch(...)
{}
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) {
continue;
}
else
return pm;
}
}
catch (...)
{}
return "NONE";
}
//not gluster, check by status
try
{
oam.getProcessStatus(systemprocessstatus);
for( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++)
{
if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" &&
systemprocessstatus.processstatus[i].ProcessOpState == oam::STANDBY )
//already have a hot-standby
return "";
if ( backupStandbyModule != "NONE" )
continue;
if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" &&
systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY )
// Found a ProcessManager in a COLD_STANDBY state
newStandbyModule = systemprocessstatus.processstatus[i].Module;
if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" &&
systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_OFFLINE &&
backupStandbyModule == "NONE" &&
newStandbyModule == "NONE" )
{
// Found a ProcessManager in a MAN_OFFLINE state, use if no COLD_STANDBY is found
// and module is not disabled
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus(systemprocessstatus.processstatus[i].Module, opState, degraded);
}
catch(...)
{}
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) {
continue;
}
else
backupStandbyModule = systemprocessstatus.processstatus[i].Module;
}
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
if ( newStandbyModule != "NONE" )
return newStandbyModule;
return backupStandbyModule;
}
/******************************************************************************************
* @brief setStandbyModule
*
* purpose: set Standby Module info in Columnstore.xml
*
*
******************************************************************************************/
bool ProcessManager::setStandbyModule(std::string newStandbyModule, bool send)
{
Oam oam;
log.writeLog(__LINE__, "setStandbyModule called", LOG_TYPE_DEBUG);
if ( newStandbyModule.empty() )
return true;
pthread_mutex_lock(&THREAD_LOCK);
for(int i=0 ; i < 5; i++)
{
// get standby IP address and update entries
ModuleConfig moduleconfig;
oam.getSystemConfig(newStandbyModule, moduleconfig);
HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin();
string standbyIPaddr = (*pt1).IPAddr;
Configuration config;
Config* sysConfig6 = Config::makeConfig();
sysConfig6->setConfig("SystemConfig", "StandbyOAMModuleName", newStandbyModule);
sysConfig6->setConfig("ProcStatusControlStandby", "IPAddr", standbyIPaddr);
try{
sysConfig6->write();
pthread_mutex_unlock(&THREAD_LOCK);
oam.setHotStandbyPM(standbyIPaddr);
//distribute config file
distributeConfigFile("system");
log.writeLog(__LINE__, "Columnstore.xml Standby OAM updated to : " + newStandbyModule + ":" + standbyIPaddr, LOG_TYPE_DEBUG);
if (send) {
log.writeLog(__LINE__, "Send Message for new Hot-Standby ProcessManager to module = " + newStandbyModule, LOG_TYPE_DEBUG);
int retStatus = startProcess(newStandbyModule, "ProcessManager", oam::GRACEFUL_STANDBY);
log.writeLog(__LINE__, "Hot-Standby ProcessManager ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG);
}
return true;
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "setStandbyModule: EXCEPTION ERROR on sysConfig->write(): " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "setStandbyModule :EXCEPTION ERROR on sysConfig->write(): Caught unknown exception!", LOG_TYPE_ERROR);
}
}
log.writeLog(__LINE__, "setStandbyModule: failed to set enable state", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return false;
}
/******************************************************************************************
* @brief clearStandbyModule
*
* purpose: clear Standby Module info in Columnstore.xml
*
*
******************************************************************************************/
bool ProcessManager::clearStandbyModule()
{
Oam oam;
log.writeLog(__LINE__, "clearStandbyModule called", LOG_TYPE_DEBUG);
pthread_mutex_lock(&THREAD_LOCK);
Configuration config;
for(int i=0 ; i < 5; i++)
{
Config* sysConfig7 = Config::makeConfig();
sysConfig7->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
sysConfig7->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);
try{
sysConfig7->write();
pthread_mutex_unlock(&THREAD_LOCK);
oam.setHotStandbyPM(" ");
log.writeLog(__LINE__, "Clear Columnstore.xml Standby OAM", LOG_TYPE_DEBUG);
//distribute config file
distributeConfigFile("system");
return true;
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "clearStandbyModule: EXCEPTION ERROR on sysConfig->write(): " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "clearStandbyModule :EXCEPTION ERROR on sysConfig->write(): Caught unknown exception!", LOG_TYPE_ERROR);
}
sleep(1);
}
log.writeLog(__LINE__, "clearStandbyModule: failed to set enable state", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return false;
}
/******************************************************************************************
* @brief setEnableState
*
* purpose: set Enable State info in Columnstore.xml
*
*
******************************************************************************************/
int ProcessManager::setEnableState(std::string target, std::string state)
{
Oam oam;
ModuleConfig moduleconfig;
pthread_mutex_lock(&THREAD_LOCK);
for(int i=0 ; i < 5; i++)
{
try
{
oam.getSystemConfig(target, moduleconfig);
moduleconfig.DisableState = state;
try
{
oam.setSystemConfig(target, moduleconfig);
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "setEnableState: EXCEPTION ERROR on setSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "setEnableState: EXCEPTION ERROR on setSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "setEnableState: EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
log.writeLog(__LINE__, "setEnableState: EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
sleep(1);
}
log.writeLog(__LINE__, "setEnableState: failed to set enable state", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
/******************************************************************************************
* @brief sendUpgradeRequest
*
* purpose: send Upgrade Request Msg to all ACTIVE UMs
*
*
******************************************************************************************/
void sendUpgradeRequest()
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
bool exitThread = false;
int exitThreadStatus = oam::API_SUCCESS;
pthread_t ThreadId;
ThreadId = pthread_self();
// wait until DMLProc is ACTIVE
while(true)
{
try{
ProcessStatus procstat;
oam.getProcessStatus("WriteEngineServer", config.moduleName(), procstat);
if ( procstat.ProcessOpState == oam::ACTIVE)
break;
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
}
SystemModuleTypeConfig systemmoduletypeconfig;
try{
oam.getSystemConfig(systemmoduletypeconfig);
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
exitThread = true;
exitThreadStatus = oam::API_FAILURE;
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
exitThread = true;
exitThreadStatus = oam::API_FAILURE;
}
if ( exitThread ) {
pthread_detach (ThreadId);
pthread_exit(reinterpret_cast<void*>(static_cast<ptrdiff_t>(exitThreadStatus)));
}
ByteStream msg;
ByteStream::byte requestID = RUNUPGRADE;
msg << requestID;
msg << " "; // pass a blank dummy password
int returnStatus = oam::API_SUCCESS;
for( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
string moduleType = systemmoduletypeconfig.moduletypeconfig[i].ModuleType;
if ( moduleType == "um" ||
( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) ) {
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++)
{
int opState = oam::ACTIVE;
bool degraded;
try {
oam.getModuleStatus((*pt).DeviceName, opState, degraded);
if (opState == oam::ACTIVE ||
opState == oam::DEGRADED) {
returnStatus = processManager.sendMsgProcMon( (*pt).DeviceName, msg, requestID, 30 );
upgradethreadStatus = returnStatus;
if ( returnStatus != API_SUCCESS)
break;
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
}
}
}
pthread_detach (ThreadId);
pthread_exit(0);
}
/******************************************************************************************
* @brief stopProcessTypes
*
* purpose: stop by process type
*
*
******************************************************************************************/
void ProcessManager::stopProcessTypes(bool manualFlag)
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
// skip if single server install, meaning only 1 worker node
try {
Config* sysConfig = Config::makeConfig();
if ( sysConfig->getConfig("DBRM_Controller", "NumWorkers") == "1" )
return;
}
catch(...)
{
return;
}
log.writeLog(__LINE__, "stopProcessTypes Called");
//front-end first
processManager.stopProcessType("mysql", manualFlag);
processManager.stopProcessType("DMLProc", manualFlag);
processManager.stopProcessType("DDLProc", manualFlag);
processManager.stopProcessType("ExeMgr", manualFlag);
//back-end
processManager.stopProcessType("WriteEngineServer", manualFlag);
processManager.stopProcessType("PrimProc", manualFlag);
//dbrm
processManager.stopProcessType("DBRMControllerNode", manualFlag);
processManager.stopProcessType("DBRMWorkerNode", manualFlag);
log.writeLog(__LINE__, "stopProcessTypes Completed");
}
/******************************************************************************************
* @brief unmountDBRoot
*
* purpose: unmount a dbroot
*
*
******************************************************************************************/
int ProcessManager::unmountDBRoot(std::string dbrootID)
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
//get pm assigned to that dbroot
int pmID;
oam.getDbrootPmConfig(atoi(dbrootID.c_str()), pmID);
string moduleName = "pm" + oam.itoa(pmID);
log.writeLog(__LINE__, "send unmountDBRoot to pm: " + dbrootID + "/" + moduleName, LOG_TYPE_DEBUG );
ByteStream msg;
msg << (ByteStream::byte) PROCUNMOUNT;
msg << dbrootID;
return sendMsgProcMon( moduleName, msg, PROCUNMOUNT );
}
/******************************************************************************************
* @brief mountDBRoot
*
* purpose: mount a dbroot
*
*
******************************************************************************************/
int ProcessManager::mountDBRoot(std::string dbrootID)
{
ProcessLog log;
Configuration config;
ProcessManager processManager(config, log);
Oam oam;
if (GlusterConfig == "y")
return oam::API_SUCCESS;
//get pm assigned to that dbroot
int pmID;
oam.getDbrootPmConfig(atoi(dbrootID.c_str()), pmID);
string moduleName = "pm" + oam.itoa(pmID);
log.writeLog(__LINE__, "send mountDBRoot to pm: " + dbrootID + "/" + moduleName, LOG_TYPE_DEBUG );
//send msg to ProcMon if not local module
if ( config.moduleName() == moduleName ) {
string cmd = "export LC_ALL=C;mount " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/mount.txt";
system(cmd.c_str());
if ( !rootUser) {
cmd = "sudo chown -R " + USER + ":" + USER + " " + startup::StartUp::installDir() + "/data" + dbrootID + " > /dev/null";
system(cmd.c_str());
}
ifstream in("/tmp/mount.txt");
in.seekg(0, std::ios::end);
int size = in.tellg();
if ( size != 0 )
{
if (!oam.checkLogStatus("/tmp/mount.txt", "already")) {
log.writeLog(__LINE__, "mount failed, dbroot: " + dbrootID);
return API_FAILURE;
}
}
}
else
{
ByteStream msg;
msg << (ByteStream::byte) PROCMOUNT;
msg << dbrootID;
return sendMsgProcMon( moduleName, msg, PROCMOUNT );
}
return oam::API_SUCCESS;
}
/******************************************************************************************
* @brief flushInodeCache
*
* purpose: flush cache
*
*
******************************************************************************************/
void ProcessManager::flushInodeCache()
{
int fd;
ByteStream reply;
#ifdef __linux__
fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
if (fd >= 0) {
if (write(fd, "3\n", 2) == 2)
{
log.writeLog(__LINE__, "flushInodeCache successful", LOG_TYPE_DEBUG);
}
else {
log.writeLog(__LINE__, "flushInodeCache failed", LOG_TYPE_DEBUG);
}
close(fd);
}
else {
log.writeLog(__LINE__, "flushInodeCache failed to open file", LOG_TYPE_DEBUG);
}
#endif
}
/******************************************************************************************
* @brief setMySQLReplication
*
* purpose: setMySQLReplication
*
*
******************************************************************************************/
int ProcessManager::setMySQLReplication(oam::DeviceNetworkList devicenetworklist, std::string masterModule, bool failover, bool distributeDB, std::string password, bool enable)
{
Oam oam;
string MySQLRep;
try {
oam.getSystemConfig("MySQLRep", MySQLRep);
}
catch(...) {
MySQLRep = "n";
}
if ( MySQLRep == "n" && enable )
return oam::API_SUCCESS;
log.writeLog(__LINE__, "Setup MySQL Replication", LOG_TYPE_DEBUG);
// mysql port number
string MySQLPort;
try {
oam.getSystemConfig("MySQLPort", MySQLPort);
}
catch(...) {
MySQLPort = "3306";
}
if ( MySQLPort.empty() )
MySQLPort = "3306";
//get master info
if ( masterModule == oam::UnassignedName)
{
try {
oam.getSystemConfig("PrimaryUMModuleName", masterModule);
}
catch(...) {
masterModule = oam::UnassignedName;
}
if ( masterModule == oam::UnassignedName )
{
// use default setting
masterModule = "um1";
if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM )
masterModule = "pm1";
}
}
//send distubute DB
if ( distributeDB )
{
if ( devicenetworklist.size() == 0 )
{ //dist to all slaves
ByteStream msg;
ByteStream::byte requestID = oam::MASTERDIST;
msg << requestID;
msg << password;
msg << "all";
log.writeLog(__LINE__, "Distribute Master DB, master module=" + masterModule, LOG_TYPE_DEBUG);
int returnStatus = sendMsgProcMon( masterModule, msg, requestID, 60 );
if ( returnStatus != API_SUCCESS) {
log.writeLog(__LINE__, "setMySQLReplication: ERROR: Error getting MySQL Replication Master Information", LOG_TYPE_ERROR);
return API_FAILURE;
}
}
else
{
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string remoteModuleName = (*listPT).DeviceName;
//skip master
if ( remoteModuleName == masterModule )
continue;
ByteStream msg;
ByteStream::byte requestID = oam::MASTERDIST;
msg << requestID;
msg << password;
msg << remoteModuleName;
log.writeLog(__LINE__, "Distribute Master DB, master module=" + masterModule, LOG_TYPE_DEBUG);
int returnStatus = sendMsgProcMon( masterModule, msg, requestID, 60 );
if ( returnStatus != API_SUCCESS) {
log.writeLog(__LINE__, "setMySQLReplication: ERROR: Error getting MySQL Replication Master Information", LOG_TYPE_ERROR);
return API_FAILURE;
}
}
}
}
//send setup master
ByteStream msg;
ByteStream::byte requestID = oam::MASTERREP;
if ( !enable ) {
requestID = oam::DISABLEREP;
log.writeLog(__LINE__, "Disable MySQL Replication, master module=" + masterModule, LOG_TYPE_DEBUG);
}
else
log.writeLog(__LINE__, "Setup MySQL Replication, master module=" + masterModule, LOG_TYPE_DEBUG);
msg << requestID;
int returnStatus = sendMsgProcMon( masterModule, msg, requestID, 60 );
if ( returnStatus != API_SUCCESS) {
log.writeLog(__LINE__, "setMySQLReplication: ERROR: Error getting MySQL Replication Master Information", LOG_TYPE_ERROR);
return API_FAILURE;
}
//
// send msg to setup slave
//
// check if a list was provide, if not, do all modules
if ( devicenetworklist.size() == 0 )
{
log.writeLog(__LINE__, "Setup MySQL Replication on all modules", LOG_TYPE_DEBUG);
SystemModuleTypeConfig systemmoduletypeconfig;
try{
oam.getSystemConfig(systemmoduletypeconfig);
}
catch (exception& ex)
{}
for( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++)
{
int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount;
if( moduleCount == 0)
continue;
string moduleType = systemmoduletypeconfig.moduletypeconfig[i].ModuleType;
DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin();
for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++ )
{
string remoteModuleName = (*pt).DeviceName;
//skip master
if ( remoteModuleName == masterModule )
continue;
// don't do PMs unless PMwithUM flag is set
if ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) {
string moduleType = remoteModuleName.substr(0,MAX_MODULE_TYPE_SIZE);
if ( moduleType == "pm" && PMwithUM == "n" )
continue;
}
ByteStream msg1;
ByteStream::byte requestID = oam::SLAVEREP;
if ( !enable ) {
requestID = oam::DISABLEREP;
log.writeLog(__LINE__, "Disable MySQL Replication, slave module=" + remoteModuleName, LOG_TYPE_DEBUG);
}
else
log.writeLog(__LINE__, "Setup MySQL Replication, slave module=" + remoteModuleName, LOG_TYPE_DEBUG);
msg1 << requestID;
if ( enable ) {
if ( masterLogFile == oam::UnassignedName ||
masterLogPos == oam::UnassignedName )
return API_FAILURE;
msg1 << masterLogFile;
msg1 << masterLogPos;
msg1 << MySQLPort;
}
returnStatus = sendMsgProcMon( remoteModuleName, msg1, requestID, 60 );
if ( returnStatus != API_SUCCESS) {
log.writeLog(__LINE__, "setMySQLReplication: ERROR: Error setting MySQL Replication Slave", LOG_TYPE_ERROR);
return API_FAILURE;
}
}
}
}
else
{
DeviceNetworkList::iterator listPT = devicenetworklist.begin();
for( ; listPT != devicenetworklist.end() ; listPT++)
{
string remoteModuleName = (*listPT).DeviceName;
log.writeLog(__LINE__, "Setup SlavMySQL Replication on " + remoteModuleName, LOG_TYPE_DEBUG);
//skip master
if ( remoteModuleName == masterModule )
continue;
ByteStream msg1;
ByteStream::byte requestID = oam::SLAVEREP;
if ( !enable ) {
requestID = oam::DISABLEREP;
log.writeLog(__LINE__, "Disable MySQL Replication, slave module=" + remoteModuleName, LOG_TYPE_DEBUG);
}
else
log.writeLog(__LINE__, "Setup MySQL Replication, slave module=" + remoteModuleName, LOG_TYPE_DEBUG);
msg1 << requestID;
if ( masterLogFile == oam::UnassignedName ||
masterLogPos == oam::UnassignedName )
{
log.writeLog(__LINE__, "setMySQLReplication: ERROR: Unassigned masterLogFile or masterLogPos", LOG_TYPE_ERROR);
return API_FAILURE;
}
if ( enable ) {
if ( masterLogFile == oam::UnassignedName ||
masterLogPos == oam::UnassignedName )
return API_FAILURE;
msg1 << masterLogFile;
msg1 << masterLogPos;
msg1 << MySQLPort;
}
returnStatus = sendMsgProcMon( remoteModuleName, msg1, requestID, 60 );
if ( returnStatus != API_SUCCESS) {
log.writeLog(__LINE__, "setMySQLReplication: ERROR: Error setting MySQL Replication Slave", LOG_TYPE_ERROR);
return API_FAILURE;
}
}
}
return oam::API_SUCCESS;
}
/******************************************************************************************
* @brief glusterAssign
*
* purpose: Gluster assign dbroot to a module
*
******************************************************************************************/
int ProcessManager::glusterAssign(std::string moduleName, std::string dbroot)
{
ByteStream msg;
ByteStream::byte requestID = PROCGLUSTERASSIGN;
msg << requestID;
msg << dbroot;
int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 );
if ( returnStatus == API_SUCCESS)
{
//log the success event
log.writeLog(__LINE__, "glusterAssign Success: " + moduleName + "/" + dbroot, LOG_TYPE_DEBUG);
}
else
{
//log the error event
log.writeLog(__LINE__, "glusterAssign FAILED: " + moduleName + "/" + dbroot, LOG_TYPE_ERROR);
}
return returnStatus;
}
/******************************************************************************************
* @brief glusterUnassign
*
* purpose: Gluster Unassign dbroot to a module
*
******************************************************************************************/
int ProcessManager::glusterUnassign(std::string moduleName, std::string dbroot)
{
ByteStream msg;
ByteStream::byte requestID = PROCGLUSTERUNASSIGN;
msg << requestID;
msg << dbroot;
int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 );
if ( returnStatus == API_SUCCESS)
{
//log the success event
log.writeLog(__LINE__, "glusterUnassign Success: " + moduleName + "/" + dbroot, LOG_TYPE_DEBUG);
}
else
{
//log the error event
log.writeLog(__LINE__, "glusterUnassign FAILED: " + moduleName + "/" + dbroot, LOG_TYPE_ERROR);
}
return returnStatus;
}
} //end of namespace
// vim:ts=4 sw=4: