From 1b43cc8d4ee18819972e0a69e04c485cbe12bfb9 Mon Sep 17 00:00:00 2001 From: david hill Date: Thu, 20 Jul 2017 17:27:23 -0500 Subject: [PATCH] MCOL-814 - amazon chnages for failover and query testing --- oam/install_scripts/columnstore | 2 +- oamapps/postConfigure/postConfigure.cpp | 2 +- procmgr/main.cpp | 48 +++++++++++++++++++------ procmgr/processmanager.cpp | 7 ---- procmon/processmonitor.cpp | 4 +-- 5 files changed, 41 insertions(+), 22 deletions(-) diff --git a/oam/install_scripts/columnstore b/oam/install_scripts/columnstore index f4fc48157..8978afd61 100644 --- a/oam/install_scripts/columnstore +++ b/oam/install_scripts/columnstore @@ -90,7 +90,7 @@ start() { exit 1 fi fi - #checkInstallSetup + checkInstallSetup CoreFileFlag=`$InstallDir/bin/getConfig -c $InstallDir/etc/Columnstore.xml Installation CoreFileFlag` if [ $CoreFileFlag = "y" ]; then diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 590bbafd9..b858f94b3 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -4886,7 +4886,7 @@ bool copyFstab(string moduleName) if ( rootUser) cmd = "/bin/cp -f /etc/fstab " + installDir + "/local/etc/" + moduleName + "/. > /dev/null 2>&1"; else - cmd = "/sudo bin/cp -f /etc/fstab " + installDir + "/local/etc/" + moduleName + "/. > /dev/null 2>&1"; + cmd = "sudo /bin/cp -f /etc/fstab " + installDir + "/local/etc/" + moduleName + "/. > /dev/null 2>&1"; system(cmd.c_str()); diff --git a/procmgr/main.cpp b/procmgr/main.cpp index fe00412ba..8281068d5 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1261,6 +1261,9 @@ void pingDeviceThread() break; //set query system state not ready + BRM::DBRM dbrm; + dbrm.setSystemQueryReady(false); + processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -1276,9 +1279,6 @@ void pingDeviceThread() //send notification oam.sendDeviceNotification(config.moduleName(), MODULE_UP); - //set module to enable state - processManager.enableModule(moduleName, oam::AUTO_OFFLINE); - int status; // if shared pm, move dbroots back to pm @@ -1289,6 +1289,9 @@ void pingDeviceThread() //restart to get the versionbuffer files closed so it can be unmounted processManager.restartProcessType("WriteEngineServer", moduleName); + //set module to enable state + processManager.enableModule(moduleName, oam::AUTO_OFFLINE); + downActiveOAMModule = false; int retry; for ( retry = 0 ; retry < 5 ; retry++ ) @@ -1380,6 +1383,9 @@ void pingDeviceThread() break; } } + else + //set module to enable state + processManager.enableModule(moduleName, oam::AUTO_OFFLINE); //restart module processes int retry = 0; @@ -1480,14 +1486,6 @@ void pingDeviceThread() continue; } - //call dbrm control, need to resume before start so the getdbrmfiles halt doesn't hang - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - // next, startmodule status = processManager.startModule(moduleName, oam::FORCEFUL, oam::AUTO_OFFLINE); if ( status == oam::API_SUCCESS ) @@ -1502,6 +1500,14 @@ void pingDeviceThread() if ( retry < ModuleProcMonWaitCount ) { // module successfully started + //call dbrm control, need to resume before start so the getdbrmfiles halt doesn't hang + oam.dbrmctl("reload"); + log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); + + // resume the dbrm + oam.dbrmctl("resume"); + log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + //distribute config file processManager.distributeConfigFile("system"); sleep(1); @@ -1543,6 +1549,9 @@ void pingDeviceThread() processManager.restartProcessType("DMLProc", moduleName); } + //enable query stats + dbrm.setSystemQueryReady(true); + //set query system state ready processManager.setQuerySystemState(true); @@ -1600,6 +1609,9 @@ void pingDeviceThread() else processManager.setSystemState(oam::ACTIVE); + //enable query stats + dbrm.setSystemQueryReady(true); + //set query system state ready processManager.setQuerySystemState(true); @@ -1638,8 +1650,13 @@ void pingDeviceThread() log.writeLog(__LINE__, "module is down: " + moduleName, LOG_TYPE_CRITICAL); //set query system state not ready + BRM::DBRM dbrm; + dbrm.setSystemQueryReady(false); + processManager.setQuerySystemState(false); + processManager.setSystemState(oam::BUSY_INIT); + processManager.reinitProcessType("cpimport"); // halt the dbrm @@ -1888,6 +1905,9 @@ void pingDeviceThread() //set recycle process processManager.recycleProcess(moduleName); + //enable query stats + dbrm.setSystemQueryReady(true); + //set query system state ready processManager.setQuerySystemState(true); @@ -1902,6 +1922,9 @@ void pingDeviceThread() oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + //enable query stats + dbrm.setSystemQueryReady(true); + //set query system state ready processManager.setQuerySystemState(true); } @@ -1915,6 +1938,9 @@ void pingDeviceThread() //set recycle process processManager.recycleProcess(moduleName); + //enable query stats + dbrm.setSystemQueryReady(true); + //set query system state ready processManager.setQuerySystemState(true); diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index d0f4ff385..b83410e16 100644 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -2945,15 +2945,8 @@ void processMSG(messageqcpp::IOSocket* cfIos) msg >> moduleName; - oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); - int ret = processManager.getDBRMData(fIos, moduleName); - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - if ( ret == oam::API_SUCCESS ) log.writeLog(__LINE__, "Get DBRM Data Files Completed"); else diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 8da3a6734..e0bcf9a66 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -594,7 +594,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO log.writeLog(__LINE__, "START: process already active " + processName); //Inform Process Manager that Process restart - processRestarted(processName); + //processRestarted(processName); ackMsg << (ByteStream::byte) ACK; ackMsg << (ByteStream::byte) START; @@ -693,7 +693,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO } //Inform Process Manager that Process restart - processRestarted(processName); + //processRestarted(processName); ackMsg << (ByteStream::byte) ACK; ackMsg << (ByteStream::byte) RESTART;