1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

mcol-833 - merge code from 1.0 for missing file fix

This commit is contained in:
david hill
2017-07-26 15:03:52 -05:00
parent 7691f05191
commit 26ac4aa31c
4 changed files with 89 additions and 59 deletions

View File

@ -336,7 +336,7 @@ void DistributedEngineComm::Listen(boost::shared_ptr<MessageQueueClient> client,
Error:
// @bug 488 - error condition! push 0 length bs to messagequeuemap and
// eventually let jobstep error out.
mutex::scoped_lock lk(fMlock);
/* mutex::scoped_lock lk(fMlock);
//cout << "WARNING: DEC READ 0 LENGTH BS FROM " << client->otherEnd()<< endl;
MessageQueueMap::iterator map_tok;
@ -370,7 +370,7 @@ Error:
fPmConnections.swap(tempConns);
pmCount = (pmCount == 0 ? 0 : pmCount - 1);
//cout << "PMCOUNT=" << pmCount << endl;
*/
// send alarm & log it
ALARMManager alarmMgr;
string alarmItem = client->addr2String();
@ -861,7 +861,7 @@ int DistributedEngineComm::writeToClient(size_t index, const ByteStream& bs, uin
{
// @bug 488. error out under such condition instead of re-trying other connection,
// by pushing 0 size bytestream to messagequeue and throw excpetion
SBS sbs;
/* SBS sbs;
lk.lock();
//cout << "WARNING: DEC WRITE BROKEN PIPE. PMS index = " << index << endl;
MessageQueueMap::iterator map_tok;
@ -894,7 +894,7 @@ int DistributedEngineComm::writeToClient(size_t index, const ByteStream& bs, uin
fPmConnections.swap(tempConns);
pmCount = (pmCount == 0 ? 0 : pmCount - 1);
}
*/
// send alarm
ALARMManager alarmMgr;
string alarmItem("UNKNOWN");

View File

@ -58,6 +58,7 @@ bool HDFS = false;
string localHostName;
string PMwithUM = "n";
string MySQLRep = "n";
string DBRootStorageType = "internal";
// pushing the ACTIVE_ALARMS_FILE to all nodes every 10 seconds.
const int ACTIVE_ALARMS_PUSHING_INTERVAL = 10;
@ -1365,6 +1366,9 @@ void pingDeviceThread()
break;
//set query system state not ready
BRM::DBRM dbrm;
dbrm.setSystemQueryReady(false);
processManager.setQuerySystemState(false);
processManager.setSystemState(oam::BUSY_INIT);
@ -1380,19 +1384,19 @@ void pingDeviceThread()
//send notification
oam.sendDeviceNotification(config.moduleName(), MODULE_UP);
//set module to enable state
processManager.enableModule(moduleName, oam::AUTO_OFFLINE);
int status;
// if pm, move dbroots back to pm
if ( ( moduleName.find("pm") == 0 && !amazon ) ||
// if shared pm, move dbroots back to pm
if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) ||
( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) ||
( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) {
//restart to get the versionbuffer files closed so it can be unmounted
processManager.restartProcessType("WriteEngineServer", moduleName);
//set module to enable state
processManager.enableModule(moduleName, oam::AUTO_OFFLINE);
downActiveOAMModule = false;
int retry;
for ( retry = 0 ; retry < 5 ; retry++ )
@ -1484,6 +1488,9 @@ void pingDeviceThread()
break;
}
}
else
//set module to enable state
processManager.enableModule(moduleName, oam::AUTO_OFFLINE);
//restart module processes
int retry = 0;
@ -1584,14 +1591,6 @@ void pingDeviceThread()
continue;
}
//call dbrm control, need to resume before start so the getdbrmfiles halt doesn't hang
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// resume the dbrm
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
// next, startmodule
status = processManager.startModule(moduleName, oam::FORCEFUL, oam::AUTO_OFFLINE);
if ( status == oam::API_SUCCESS )
@ -1606,6 +1605,14 @@ void pingDeviceThread()
if ( retry < ModuleProcMonWaitCount )
{ // module successfully started
//call dbrm control, need to resume before start so the getdbrmfiles halt doesn't hang
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// resume the dbrm
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
//distribute config file
processManager.distributeConfigFile("system");
sleep(1);
@ -1647,6 +1654,9 @@ void pingDeviceThread()
processManager.restartProcessType("DMLProc", moduleName);
}
//enable query stats
dbrm.setSystemQueryReady(true);
//set query system state ready
processManager.setQuerySystemState(true);
@ -1664,8 +1674,9 @@ void pingDeviceThread()
aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET);
// if pm, move dbroots back to pm
if ( ( moduleName.find("pm") == 0 && !amazon ) ||
( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) ) {
if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) ||
( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) ||
( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) {
//move dbroots to other modules
try {
log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG);
@ -1703,6 +1714,9 @@ void pingDeviceThread()
else
processManager.setSystemState(oam::ACTIVE);
//enable query stats
dbrm.setSystemQueryReady(true);
//set query system state ready
processManager.setQuerySystemState(true);
@ -1741,8 +1755,13 @@ void pingDeviceThread()
log.writeLog(__LINE__, "module is down: " + moduleName, LOG_TYPE_CRITICAL);
//set query system state not ready
BRM::DBRM dbrm;
dbrm.setSystemQueryReady(false);
processManager.setQuerySystemState(false);
processManager.setSystemState(oam::BUSY_INIT);
processManager.reinitProcessType("cpimport");
// halt the dbrm
@ -1771,25 +1790,24 @@ void pingDeviceThread()
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// if pm, move dbroots to other pms
if ( !amazon ||
( amazon && AmazonPMFailover == "y") ) {
if( moduleName.find("pm") == 0 ) {
try {
log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG);
oam.autoMovePmDbroot(moduleName);
log.writeLog(__LINE__, "autoMovePmDbroot success", LOG_TYPE_DEBUG);
//distribute config file
processManager.distributeConfigFile("system");
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: " + error, LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR);
}
if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) ||
( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) ||
( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) {
try {
log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG);
oam.autoMovePmDbroot(moduleName);
log.writeLog(__LINE__, "autoMovePmDbroot success", LOG_TYPE_DEBUG);
//distribute config file
processManager.distributeConfigFile("system");
}
catch (exception& ex)
{
string error = ex.what();
log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: " + error, LOG_TYPE_DEBUG);
}
catch(...)
{
log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR);
}
}
@ -1968,7 +1986,9 @@ void pingDeviceThread()
processManager.removeModule(devicenetworklist, false);
// if pm, move dbroots to other pms
if( moduleName.find("pm") == 0 ) {
if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) ||
( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) ||
( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) {
try {
log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG);
oam.autoMovePmDbroot(moduleName);
@ -1990,6 +2010,9 @@ void pingDeviceThread()
//set recycle process
processManager.recycleProcess(moduleName);
//enable query stats
dbrm.setSystemQueryReady(true);
//set query system state ready
processManager.setQuerySystemState(true);
@ -2004,6 +2027,9 @@ void pingDeviceThread()
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
//enable query stats
dbrm.setSystemQueryReady(true);
//set query system state ready
processManager.setQuerySystemState(true);
}
@ -2017,6 +2043,9 @@ void pingDeviceThread()
//set recycle process
processManager.recycleProcess(moduleName);
//enable query stats
dbrm.setSystemQueryReady(true);
//set query system state ready
processManager.setQuerySystemState(true);

View File

@ -72,6 +72,7 @@ bool startsystemthreadRunning = false;
string gdownActiveOAMModule;
vector<string> downModuleList;
bool startFailOver = false;
extern string DBRootStorageType;
string masterLogFile = oam::UnassignedName;
string masterLogPos = oam::UnassignedName;
@ -2791,6 +2792,16 @@ void processMSG(messageqcpp::IOSocket* cfIos)
log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted on " + moduleName + "/" + processName);
//set query system states not ready
BRM::DBRM dbrm;
dbrm.setSystemQueryReady(false);
processManager.setQuerySystemState(false);
processManager.setSystemState(oam::BUSY_INIT);
processManager.reinitProcessType("cpimport");
//request reinit after Process is active
for ( int i = 0; i < 600 ; i++ ) {
try {
@ -2916,6 +2927,13 @@ void processMSG(messageqcpp::IOSocket* cfIos)
break;
}
}
//enable query stats
dbrm.setSystemQueryReady(true);
processManager.setQuerySystemState(true);
processManager.setSystemState(oam::ACTIVE);
}
break;
@ -8525,14 +8543,6 @@ int ProcessManager::switchParentOAMModule(std::string newActiveModuleName)
log.writeLog(__LINE__, "switchParentOAMModule Function Started", LOG_TYPE_DEBUG);
string DBRootStorageType = "internal";
{
try{
oam.getSystemConfig("DBRootStorageType", DBRootStorageType);
}
catch(...) {}
}
if ( DBRootStorageType == "internal" && GlusterConfig == "n") {
log.writeLog(__LINE__, "ERROR: DBRootStorageType = internal", LOG_TYPE_ERROR);
pthread_mutex_unlock(&THREAD_LOCK);
@ -8818,15 +8828,6 @@ int ProcessManager::OAMParentModuleChange()
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
}
// dbroot storage type, do different failover if internal
string DBRootStorageType = "internal";
{
try{
oam.getSystemConfig("DBRootStorageType", DBRootStorageType);
}
catch(...) {}
}
string cmdLine = "ping ";
string cmdOption = " -c 1 -w 5 >> /dev/null";
string cmd;

View File

@ -595,7 +595,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO
log.writeLog(__LINE__, "START: process already active " + processName);
//Inform Process Manager that Process restart
processRestarted(processName);
//processRestarted(processName);
ackMsg << (ByteStream::byte) ACK;
ackMsg << (ByteStream::byte) START;
@ -694,7 +694,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO
}
//Inform Process Manager that Process restart
processRestarted(processName);
//processRestarted(processName);
ackMsg << (ByteStream::byte) ACK;
ackMsg << (ByteStream::byte) RESTART;