From d454f84b82972210aa40db482c9995f0486701cb Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Mon, 10 Jun 2019 09:41:03 -0500 Subject: [PATCH] Cleaned up the hacks made so far. --- procmgr/processmanager.cpp | 8 +++---- procmon/main.cpp | 21 ++++++++++++----- procmon/processmonitor.cpp | 45 +++++++++++++----------------------- utils/cloudio/SocketPool.cpp | 8 ++++--- 4 files changed, 39 insertions(+), 43 deletions(-) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 623f1951b..3327e15d7 100644 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -9170,13 +9170,13 @@ int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleNa oam.getSystemConfig("OIDBitmapFile", oidFile); - // StorageManager: Need to make these existence checks use an idbfilesystem op? + // StorageManager: Need to make these existence checks use an idbfilesystem op if we + // decide to put the BRM-managed files in cloud storage string currentDbrmFile; ifstream oldFile (currentFileName.c_str()); if (oldFile) { - // current file found, check for OIDBitmapFile ifstream mapFile (oidFile.c_str()); @@ -9293,7 +9293,7 @@ int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleNa } // put oid file and current file in list - // StorageManager: no need to distribute these files + // StorageManager: no need to distribute these files if in cloud storage dbrmFiles.push_back(currentFileName); ifstream file1 (journalFileName.c_str()); @@ -9328,7 +9328,6 @@ int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleNa //remove any file of size 0 std::vector::iterator pt1 = dbrmFiles.begin(); - // StorageManager: ? for ( ; pt1 != dbrmFiles.end() ; pt1++) { string fileName = *pt1; @@ -9362,7 +9361,6 @@ int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleNa return oam::API_FAILURE; } - // StorageManager: ? pt1 = dbrmFiles.begin(); for ( ; pt1 != dbrmFiles.end() ; pt1++) { diff --git a/procmon/main.cpp b/procmon/main.cpp index adf03b5b4..e23c74e82 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -1014,6 +1014,9 @@ int main(int argc, char** argv) (*listPtr).DepModuleName, (*listPtr).LogFile, initType); + + // StorageManager doesn't send the "I'm online" msg to Proc*. + // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. if (listPtr->ProcessName == "StorageManager") { log.writeLog(__LINE__, "StorageManager WTF? 1", LOG_TYPE_DEBUG); @@ -1071,12 +1074,15 @@ int main(int argc, char** argv) (*listPtr).DepModuleName, (*listPtr).LogFile, initType); - if (listPtr->ProcessName == "StorageManager") - { - log.writeLog(__LINE__, "StorageManager WTF? 2", LOG_TYPE_DEBUG); - oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()), - oam::ACTIVE, listPtr->processID); - } + + // StorageManager doesn't send the "I'm online" msg to Proc*. + // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. + if (listPtr->ProcessName == "StorageManager") + { + log.writeLog(__LINE__, "StorageManager WTF? 2", LOG_TYPE_DEBUG); + oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()), + oam::ACTIVE, listPtr->processID); + } string restartStatus; @@ -1664,6 +1670,9 @@ static void chldHandleThread(MonitorConfig config) (*listPtr).DepModuleName, (*listPtr).LogFile, initStatus); + + // StorageManager doesn't send the "I'm online" msg to Proc*. + // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. if (listPtr->ProcessName == "StorageManager") { log.writeLog(__LINE__, "StorageManager WTF? 3", LOG_TYPE_DEBUG); diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index fd501d8a8..fa77e8be4 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -575,11 +575,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO break; } - if (processName == "StorageManager") // storagemanager doesn't send its own response - { - //ackMsg << (uint8_t) ACK << (uint8_t) START << (uint8_t) API_SUCCESS; - //mq.write(ackMsg); - } ProcessConfig processconfig; ProcessStatus processstatus; @@ -663,6 +658,9 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO processconfig.LogFile, initType, actIndicator); + + // StorageManager doesn't send the "I'm online" msg to Proc*. + // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. if (processconfig.ProcessName == "StorageManager") { log.writeLog(__LINE__, "StorageManager WTF? 6", LOG_TYPE_DEBUG); @@ -714,11 +712,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO break; } - if (processName == "StorageManager") // storagemanager doesn't send its own response - { - //ackMsg << (uint8_t) ACK << (uint8_t) RESTART << (uint8_t) API_SUCCESS; - // mq.write(ackMsg); - } processList::iterator listPtr; processList* aPtr = config.monitoredListPtr(); @@ -769,6 +762,9 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO (*listPtr).DepModuleName, (*listPtr).LogFile, initType); + + // StorageManager doesn't send the "I'm online" msg to Proc*. + // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. if (listPtr->ProcessName == "StorageManager") { log.writeLog(__LINE__, "StorageManager WTF? 7", LOG_TYPE_DEBUG); @@ -1120,11 +1116,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO break; } - if (processName == "StorageManager") // storagemanager doesn't send its own status - { - //ackMsg << (uint8_t) ACK << (uint8_t) STARTALL << (uint8_t) API_SUCCESS; - //mq.write(ackMsg); - } if ( config.moduleType() == "pm" ) { @@ -1219,6 +1210,9 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO (*listPtr).DepModuleName, (*listPtr).LogFile, initType); + + // StorageManager doesn't send the "I'm online" msg to Proc*. + // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. if (listPtr->ProcessName == "StorageManager") { log.writeLog(__LINE__, "StorageManager WTF? 4", LOG_TYPE_DEBUG); @@ -1291,6 +1285,9 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO (*listPtr).DepModuleName, (*listPtr).LogFile, initType); + + // StorageManager doesn't send the "I'm online" msg to Proc*. + // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. if (listPtr->ProcessName == "StorageManager") { log.writeLog(__LINE__, "StorageManager WTF? 5", LOG_TYPE_DEBUG); @@ -2269,8 +2266,8 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName, SystemProcessStatus systemprocessstatus; ProcessStatus processstatus; - log.writeLog(__LINE__, "STARTING Process: " + processName, LOG_TYPE_CRITICAL); //, LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "Process location: " + processLocation, LOG_TYPE_CRITICAL); //, LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "STARTING Process: " + processName, LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "Process location: " + processLocation, LOG_TYPE_DEBUG); //check process location if (access(processLocation.c_str(), X_OK) != 0) @@ -2697,8 +2694,7 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName, updateProcessInfo(processName, initType, 0); //sleep, give time for INIT state to be update, prevent race condition with ACTIVE - if (processName != "StorageManager") - sleep(1); + sleep(1); //check and setup for logfile time_t now; @@ -2853,16 +2849,9 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName, } //give time to get INIT status updated in shared memory - if (processName != "StorageManager") - sleep(1); + sleep(1); execv(processLocation.c_str(), argList); - if (processName == "StorageManager") - { - char buf[80]; - int l_errno = errno; - log.writeLog(__LINE__, "exec'ing StorageManager failed, got " + string(strerror_r(l_errno, buf, 80)), LOG_TYPE_DEBUG); - } //record the process information into processList config.buildList(processModuleType, processName, processLocation, arg_list, launchID, newProcessID, FAILED, BootLaunch, RunType, @@ -4825,7 +4814,6 @@ int ProcessMonitor::runHDFSTest() ifstream File (DataFilePlugin.c_str()); -#if 0 // for storagemanager if (!File) { log.writeLog(__LINE__, "Error: Hadoop Datafile Plugin File (" + DataFilePlugin + ") doesn't exist", LOG_TYPE_CRITICAL); @@ -4859,7 +4847,6 @@ int ProcessMonitor::runHDFSTest() fail = true; } } -#endif if (!fail) { diff --git a/utils/cloudio/SocketPool.cpp b/utils/cloudio/SocketPool.cpp index df7fab540..e27757813 100644 --- a/utils/cloudio/SocketPool.cpp +++ b/utils/cloudio/SocketPool.cpp @@ -77,7 +77,9 @@ SocketPool::~SocketPool() #define sm_check_error \ if (err < 0) \ { \ - cout << "SP: got an error on the socket" << endl; \ + char _smbuf[80]; \ + int l_errno = errno; \ + log(logging::LOG_TYPE_ERROR, string("SocketPool: got a network error: ") + strerror_r(l_errno, _smbuf, 80)); \ remoteClosed(sock); \ return -1; \ } @@ -95,7 +97,7 @@ int SocketPool::send_recv(messageqcpp::ByteStream &in, messageqcpp::ByteStream * sock = getSocket(); if (sock < 0) { - log(logging::LOG_TYPE_ERROR, "SocketPool::send_recv(): failed to get a connection, retrying in 5 sec..."); + log(logging::LOG_TYPE_ERROR, "SocketPool::send_recv(): retrying in 5 sec..."); sleep(5); } } @@ -214,7 +216,7 @@ int SocketPool::getSocket() char buf[80]; os << "SocketPool::getSocket() failed to connect; got '" << strerror_r(saved_errno, buf, 80); cout << os.str() << endl; - log(logging::LOG_TYPE_CRITICAL, os.str()); + log(logging::LOG_TYPE_ERROR, os.str()); errno = saved_errno; } return clientSocket;