1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-07 03:22:57 +03:00

MCOL-1610: modify so if moving a dbroot fails with gluster it is reassigned to original owner. Add logging around failure for mounting gluster volumes.

This commit is contained in:
Ben Thompson
2018-08-06 10:10:52 -05:00
parent d1969e4f6e
commit 515cc31d4f
4 changed files with 76 additions and 56 deletions

View File

@@ -5295,6 +5295,7 @@ namespace oam
dbrootList dbroot1;
dbroot1.push_back(*pt1);
bool returnDbRoot = false;
//send msg to unmount dbroot if module is not offline
int opState;
@@ -5306,7 +5307,6 @@ namespace oam
{}
if (opState != oam::AUTO_OFFLINE || opState != oam::AUTO_DISABLED) {
// bool unmountPass = true;
try
{
mountDBRoot(dbroot1, false);
@@ -5316,13 +5316,8 @@ namespace oam
writeLog("ERROR: dbroot failed to unmount", LOG_TYPE_ERROR );
cout << endl << "ERROR: umountDBRoot api failure" << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE);
// unmountPass = false;
}
// if ( !unmountPass) {
// dbrootlist.erase(pt1);
// break;
// }
}
//check for amazon moving required
@@ -5340,38 +5335,79 @@ namespace oam
//if Gluster, do the assign command
if ( DataRedundancyConfig == "y")
{
try {
try
{
string errmsg;
int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, toPM, errmsg);
if ( ret != 0 )
if ( ret == 0 )
{
todbrootConfigList.push_back(*pt2);
residedbrootConfigList.erase(pt2);
}
else
{
cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE);
writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg, LOG_TYPE_ERROR );
returnDbRoot = true;
}
}
catch (exception& e)
{
cout << endl << "**** glusterctl API exception: " << e.what() << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE);
writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR );
returnDbRoot = true;
}
catch (...)
{
cout << endl << "**** glusterctl API exception: UNKNOWN" << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE);
writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR );
returnDbRoot = true;
}
}
todbrootConfigList.push_back(*pt2);
residedbrootConfigList.erase(pt2);
if (returnDbRoot)
{
// something went wrong return it back to original owner
try
{
string errmsg;
writeLog("reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR );
int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, residePM, errmsg);
if ( ret != 0 )
{
cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg << endl;
writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg, LOG_TYPE_ERROR );
exceptionControl("manualMovePmDbroot", API_INVALID_STATE);
}
mountDBRoot(dbroot1);
//get updated Columnstore.xml distributed
distributeConfigFile("system");
return;
}
catch (exception& e)
{
cout << endl << "**** glusterctl API exception: " << e.what() << endl;
cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl;
writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR );
exceptionControl("manualMovePmDbroot", API_INVALID_STATE);
}
catch (...)
{
cout << endl << "**** glusterctl API exception: UNKNOWN" << endl;
cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl;
writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR );
exceptionControl("manualMovePmDbroot", API_INVALID_STATE);
}
}
break;
}
}
}
//set the 2 pms dbroot config
try
{
@@ -5381,7 +5417,7 @@ namespace oam
{
writeLog("ERROR: setPmDbrootConfig api failure for pm" + residePMID , LOG_TYPE_ERROR );
cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + residePMID << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE);
exceptionControl("manualMovePmDbroot", API_INVALID_STATE);
}
try
@@ -5392,7 +5428,7 @@ namespace oam
{
writeLog("ERROR: setPmDbrootConfig api failure for pm" + toPMID , LOG_TYPE_ERROR );
cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + toPMID << endl;
exceptionControl("manualMovePmDbroot", API_FAILURE);
exceptionControl("manualMovePmDbroot", API_INVALID_STATE);
}
//send msg to mount dbroot
@@ -5980,7 +6016,7 @@ namespace oam
}
if (!found) {
writeLog("No dbroots found in ../Calpont/local/moveDbrootTransactionLog", LOG_TYPE_DEBUG );
writeLog("No dbroots found in " + InstallDir + "/moveDbrootTransactionLog", LOG_TYPE_DEBUG );
cout << "No dbroots found in " << fileName << endl;
}
@@ -6518,32 +6554,7 @@ namespace oam
for( ; pt3 != dbrootlist.end() ; pt3++)
{
todbrootConfigList.push_back(*pt3);
/* if ( DataRedundancyConfig == "y")
{
try {
string errmsg;
int ret = glusterctl(oam::GLUSTER_ASSIGN, itoa(*pt3), toPM, errmsg);
if ( ret != 0 )
{
cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID + ", error: " + errmsg << endl;
exceptionControl("assignPmDbrootConfig", API_FAILURE);
}
}
catch (exception& e)
{
cout << endl << "**** glusterctl API exception: " << e.what() << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl;
exceptionControl("assignPmDbrootConfig", API_FAILURE);
}
catch (...)
{
cout << endl << "**** glusterctl API exception: UNKNOWN" << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl;
exceptionControl("assignPmDbrootConfig", API_FAILURE);
}
}
*/ }
}
try
{
@@ -6961,12 +6972,14 @@ namespace oam
{
cout << endl << "**** glusterctl API exception: " << e.what() << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl;
writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR );
exceptionControl("removeDbroot", API_FAILURE);
}
catch (...)
{
cout << endl << "**** glusterctl API exception: UNKNOWN" << endl;
cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl;
writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR );
exceptionControl("removeDbroot", API_FAILURE);
}
}

View File

@@ -1574,7 +1574,7 @@ void pingDeviceThread()
{
// no dbroots, fail module
log.writeLog(__LINE__, "autoUnMovePmDbroot left no dbroots mounted, failing module restart: " + moduleName, LOG_TYPE_WARNING);
//Issue an alarm
aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET);
@@ -1597,7 +1597,7 @@ void pingDeviceThread()
//set query system state ready
processManager.setQuerySystemState(true);
break;
goto break_case;
}
}
catch(...)
@@ -1619,25 +1619,24 @@ void pingDeviceThread()
if ( retry == 5 )
{
log.writeLog(__LINE__, "autoUnMovePmDbroot: Failed. Fail Module", LOG_TYPE_WARNING);
log.writeLog(__LINE__, "System DBRM READ ONLY - Verify dbroot mounts.", LOG_TYPE_WARNING);
//Issue an alarm
aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET);
//set module to disable state
processManager.disableModule(moduleName, true);
// Need to do something here to verify data mounts before resuming
// Best to assume if we reach this you need to put into readonly and verify all dbroots are mounted
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// resume the dbrm
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
oam.dbrmctl("readonly");
log.writeLog(__LINE__, "'dbrmctl readonly' done", LOG_TYPE_DEBUG);
//clear count
moduleInfoList[moduleName] = 0;
processManager.setSystemState(oam::ACTIVE);
processManager.setSystemState(oam::DEGRADED);
//set query system state ready
processManager.setQuerySystemState(true);
@@ -2358,6 +2357,7 @@ void pingDeviceThread()
}
} //end of for loop
}
break_case:
// check and take action if LAN outage is flagged
if (LANOUTAGESUPPORT && !LANOUTAGEACTIVE && LOCALNICDOWN)

View File

@@ -6244,7 +6244,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ
string IPAddr = sysConfig->getConfig(msgPort, "IPAddr");
if ( IPAddr == oam::UnassignedIpAddr ) {
log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR);
log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR);
return oam::API_SUCCESS;
}
@@ -6253,7 +6253,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ
string cmd = cmdLine + IPAddr + cmdOption;
if ( system(cmd.c_str()) != 0) {
//ping failure
log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR);
log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR);
return oam::API_SUCCESS;
}
}

View File

@@ -6107,10 +6107,13 @@ int ProcessMonitor::glusterAssign(std::string dbrootID)
command = "sudo mount -tglusterfs -odirect-io-mode=enable " + moduleIPAddr + ":/dbroot" +
dbrootID + " " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterAssign.txt 2>&1";
}
int ret = system(command.c_str());
if ( WEXITSTATUS(ret) != 0 )
{
log.writeLog(__LINE__, "glusterAssign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR);
ifstream in("/tmp/glusterAssign.txt");
in.seekg(0, std::ios::end);
int size = in.tellg();
@@ -6151,9 +6154,13 @@ int ProcessMonitor::glusterUnassign(std::string dbrootID)
{
command = "sudo umount -f " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterUnassign.txt 2>&1";
}
int ret = system(command.c_str());
if ( WEXITSTATUS(ret) != 0 )
{
log.writeLog(__LINE__, "glusterUnassign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR);
ifstream in("/tmp/glusterUnassign.txt");
in.seekg(0, std::ios::end);
int size = in.tellg();