1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-07 03:22:57 +03:00

MCOL-1610: modify so if moving a dbroot fails with gluster it is reassigned to original owner. Add logging around failure for mounting gluster volumes.

This commit is contained in:
Ben Thompson
2018-08-06 10:10:52 -05:00
parent d1969e4f6e
commit 515cc31d4f
4 changed files with 76 additions and 56 deletions

View File

@@ -5295,6 +5295,7 @@ namespace oam
dbrootList dbroot1; dbrootList dbroot1;
dbroot1.push_back(*pt1); dbroot1.push_back(*pt1);
bool returnDbRoot = false;
//send msg to unmount dbroot if module is not offline //send msg to unmount dbroot if module is not offline
int opState; int opState;
@@ -5306,7 +5307,6 @@ namespace oam
{} {}
if (opState != oam::AUTO_OFFLINE || opState != oam::AUTO_DISABLED) { if (opState != oam::AUTO_OFFLINE || opState != oam::AUTO_DISABLED) {
// bool unmountPass = true;
try try
{ {
mountDBRoot(dbroot1, false); mountDBRoot(dbroot1, false);
@@ -5316,13 +5316,8 @@ namespace oam
writeLog("ERROR: dbroot failed to unmount", LOG_TYPE_ERROR ); writeLog("ERROR: dbroot failed to unmount", LOG_TYPE_ERROR );
cout << endl << "ERROR: umountDBRoot api failure" << endl; cout << endl << "ERROR: umountDBRoot api failure" << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE); exceptionControl("manualMovePmDbroot", API_FAILURE);
// unmountPass = false;
} }
// if ( !unmountPass) {
// dbrootlist.erase(pt1);
// break;
// }
} }
//check for amazon moving required //check for amazon moving required
@@ -5340,38 +5335,79 @@ namespace oam
//if Gluster, do the assign command //if Gluster, do the assign command
if ( DataRedundancyConfig == "y") if ( DataRedundancyConfig == "y")
{ {
try { try
{
string errmsg; string errmsg;
int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, toPM, errmsg); int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, toPM, errmsg);
if ( ret != 0 ) if ( ret == 0 )
{
todbrootConfigList.push_back(*pt2);
residedbrootConfigList.erase(pt2);
}
else
{ {
cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE); writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg, LOG_TYPE_ERROR );
returnDbRoot = true;
} }
} }
catch (exception& e) catch (exception& e)
{ {
cout << endl << "**** glusterctl API exception: " << e.what() << endl; cout << endl << "**** glusterctl API exception: " << e.what() << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE); writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR );
returnDbRoot = true;
} }
catch (...) catch (...)
{ {
cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; cout << endl << "**** glusterctl API exception: UNKNOWN" << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE); writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR );
returnDbRoot = true;
} }
} }
todbrootConfigList.push_back(*pt2); if (returnDbRoot)
{
residedbrootConfigList.erase(pt2); // something went wrong return it back to original owner
try
{
string errmsg;
writeLog("reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR );
int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, residePM, errmsg);
if ( ret != 0 )
{
cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg << endl;
writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg, LOG_TYPE_ERROR );
exceptionControl("manualMovePmDbroot", API_INVALID_STATE);
}
mountDBRoot(dbroot1);
//get updated Columnstore.xml distributed
distributeConfigFile("system");
return;
}
catch (exception& e)
{
cout << endl << "**** glusterctl API exception: " << e.what() << endl;
cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl;
writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR );
exceptionControl("manualMovePmDbroot", API_INVALID_STATE);
}
catch (...)
{
cout << endl << "**** glusterctl API exception: UNKNOWN" << endl;
cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl;
writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR );
exceptionControl("manualMovePmDbroot", API_INVALID_STATE);
}
}
break; break;
} }
} }
} }
//set the 2 pms dbroot config //set the 2 pms dbroot config
try try
{ {
@@ -5381,7 +5417,7 @@ namespace oam
{ {
writeLog("ERROR: setPmDbrootConfig api failure for pm" + residePMID , LOG_TYPE_ERROR ); writeLog("ERROR: setPmDbrootConfig api failure for pm" + residePMID , LOG_TYPE_ERROR );
cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + residePMID << endl; cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + residePMID << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE); exceptionControl("manualMovePmDbroot", API_INVALID_STATE);
} }
try try
@@ -5392,7 +5428,7 @@ namespace oam
{ {
writeLog("ERROR: setPmDbrootConfig api failure for pm" + toPMID , LOG_TYPE_ERROR ); writeLog("ERROR: setPmDbrootConfig api failure for pm" + toPMID , LOG_TYPE_ERROR );
cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + toPMID << endl; cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + toPMID << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE); exceptionControl("manualMovePmDbroot", API_INVALID_STATE);
} }
//send msg to mount dbroot //send msg to mount dbroot
@@ -5980,7 +6016,7 @@ namespace oam
} }
if (!found) { if (!found) {
writeLog("No dbroots found in ../Calpont/local/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); writeLog("No dbroots found in " + InstallDir + "/moveDbrootTransactionLog", LOG_TYPE_DEBUG );
cout << "No dbroots found in " << fileName << endl; cout << "No dbroots found in " << fileName << endl;
} }
@@ -6518,32 +6554,7 @@ namespace oam
for( ; pt3 != dbrootlist.end() ; pt3++) for( ; pt3 != dbrootlist.end() ; pt3++)
{ {
todbrootConfigList.push_back(*pt3); todbrootConfigList.push_back(*pt3);
}
/* if ( DataRedundancyConfig == "y")
{
try {
string errmsg;
int ret = glusterctl(oam::GLUSTER_ASSIGN, itoa(*pt3), toPM, errmsg);
if ( ret != 0 )
{
cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID + ", error: " + errmsg << endl;
exceptionControl("assignPmDbrootConfig", API_FAILURE);
}
}
catch (exception& e)
{
cout << endl << "**** glusterctl API exception: " << e.what() << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl;
exceptionControl("assignPmDbrootConfig", API_FAILURE);
}
catch (...)
{
cout << endl << "**** glusterctl API exception: UNKNOWN" << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl;
exceptionControl("assignPmDbrootConfig", API_FAILURE);
}
}
*/ }
try try
{ {
@@ -6961,12 +6972,14 @@ namespace oam
{ {
cout << endl << "**** glusterctl API exception: " << e.what() << endl; cout << endl << "**** glusterctl API exception: " << e.what() << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl;
writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR );
exceptionControl("removeDbroot", API_FAILURE); exceptionControl("removeDbroot", API_FAILURE);
} }
catch (...) catch (...)
{ {
cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; cout << endl << "**** glusterctl API exception: UNKNOWN" << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl;
writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR );
exceptionControl("removeDbroot", API_FAILURE); exceptionControl("removeDbroot", API_FAILURE);
} }
} }

View File

@@ -1574,7 +1574,7 @@ void pingDeviceThread()
{ {
// no dbroots, fail module // no dbroots, fail module
log.writeLog(__LINE__, "autoUnMovePmDbroot left no dbroots mounted, failing module restart: " + moduleName, LOG_TYPE_WARNING); log.writeLog(__LINE__, "autoUnMovePmDbroot left no dbroots mounted, failing module restart: " + moduleName, LOG_TYPE_WARNING);
//Issue an alarm //Issue an alarm
aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET);
@@ -1597,7 +1597,7 @@ void pingDeviceThread()
//set query system state ready //set query system state ready
processManager.setQuerySystemState(true); processManager.setQuerySystemState(true);
break; goto break_case;
} }
} }
catch(...) catch(...)
@@ -1619,25 +1619,24 @@ void pingDeviceThread()
if ( retry == 5 ) if ( retry == 5 )
{ {
log.writeLog(__LINE__, "autoUnMovePmDbroot: Failed. Fail Module", LOG_TYPE_WARNING); log.writeLog(__LINE__, "autoUnMovePmDbroot: Failed. Fail Module", LOG_TYPE_WARNING);
log.writeLog(__LINE__, "System DBRM READ ONLY - Verify dbroot mounts.", LOG_TYPE_WARNING);
//Issue an alarm //Issue an alarm
aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET);
//set module to disable state //set module to disable state
processManager.disableModule(moduleName, true); processManager.disableModule(moduleName, true);
// Need to do something here to verify data mounts before resuming
// Best to assume if we reach this you need to put into readonly and verify all dbroots are mounted
//call dbrm control //call dbrm control
oam.dbrmctl("reload"); oam.dbrmctl("readonly");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); log.writeLog(__LINE__, "'dbrmctl readonly' done", LOG_TYPE_DEBUG);
// resume the dbrm
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
//clear count //clear count
moduleInfoList[moduleName] = 0; moduleInfoList[moduleName] = 0;
processManager.setSystemState(oam::ACTIVE); processManager.setSystemState(oam::DEGRADED);
//set query system state ready //set query system state ready
processManager.setQuerySystemState(true); processManager.setQuerySystemState(true);
@@ -2358,6 +2357,7 @@ void pingDeviceThread()
} }
} //end of for loop } //end of for loop
} }
break_case:
// check and take action if LAN outage is flagged // check and take action if LAN outage is flagged
if (LANOUTAGESUPPORT && !LANOUTAGEACTIVE && LOCALNICDOWN) if (LANOUTAGESUPPORT && !LANOUTAGEACTIVE && LOCALNICDOWN)

View File

@@ -6244,7 +6244,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ
string IPAddr = sysConfig->getConfig(msgPort, "IPAddr"); string IPAddr = sysConfig->getConfig(msgPort, "IPAddr");
if ( IPAddr == oam::UnassignedIpAddr ) { if ( IPAddr == oam::UnassignedIpAddr ) {
log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR);
return oam::API_SUCCESS; return oam::API_SUCCESS;
} }
@@ -6253,7 +6253,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ
string cmd = cmdLine + IPAddr + cmdOption; string cmd = cmdLine + IPAddr + cmdOption;
if ( system(cmd.c_str()) != 0) { if ( system(cmd.c_str()) != 0) {
//ping failure //ping failure
log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR);
return oam::API_SUCCESS; return oam::API_SUCCESS;
} }
} }

View File

@@ -6107,10 +6107,13 @@ int ProcessMonitor::glusterAssign(std::string dbrootID)
command = "sudo mount -tglusterfs -odirect-io-mode=enable " + moduleIPAddr + ":/dbroot" + command = "sudo mount -tglusterfs -odirect-io-mode=enable " + moduleIPAddr + ":/dbroot" +
dbrootID + " " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterAssign.txt 2>&1"; dbrootID + " " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterAssign.txt 2>&1";
} }
int ret = system(command.c_str()); int ret = system(command.c_str());
if ( WEXITSTATUS(ret) != 0 ) if ( WEXITSTATUS(ret) != 0 )
{ {
log.writeLog(__LINE__, "glusterAssign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR);
ifstream in("/tmp/glusterAssign.txt"); ifstream in("/tmp/glusterAssign.txt");
in.seekg(0, std::ios::end); in.seekg(0, std::ios::end);
int size = in.tellg(); int size = in.tellg();
@@ -6151,9 +6154,13 @@ int ProcessMonitor::glusterUnassign(std::string dbrootID)
{ {
command = "sudo umount -f " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterUnassign.txt 2>&1"; command = "sudo umount -f " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterUnassign.txt 2>&1";
} }
int ret = system(command.c_str()); int ret = system(command.c_str());
if ( WEXITSTATUS(ret) != 0 ) if ( WEXITSTATUS(ret) != 0 )
{ {
log.writeLog(__LINE__, "glusterUnassign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR);
ifstream in("/tmp/glusterUnassign.txt"); ifstream in("/tmp/glusterUnassign.txt");
in.seekg(0, std::ios::end); in.seekg(0, std::ios::end);
int size = in.tellg(); int size = in.tellg();