1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-259 add some retry logic to the OAMCache system. Add that degraded is still valid for a PM.

This commit is contained in:
David Hall
2016-08-23 16:50:56 -05:00
parent 7a47ad9f1d
commit 482047679a
6 changed files with 61 additions and 41 deletions

View File

@ -367,12 +367,13 @@ void pDictionaryScan::sendPrimitiveMessages()
if (dbRootConnectionMap->find(dbroot) == dbRootConnectionMap->end())
{
// MCOL-259 force a reload of the xml. This usualy fixes it.
std::cout << "dictionary forcing reload of columnstore.xml for dbRootConnectionMap" << std::endl;
Logger log;
log.logMessage(logging::LOG_TYPE_DEBUG, "dictionary forcing reload of columnstore.xml for dbRootConnectionMap");
oamCache->forceReload();
dbRootConnectionMap = oamCache->getDBRootToConnectionMap();
if (dbRootConnectionMap->find(dbroot) == dbRootConnectionMap->end())
{
std::cout << "dictionary still not in dbRootConnectionMap" << std::endl;
log.logMessage(logging::LOG_TYPE_DEBUG, "dictionary still not in dbRootConnectionMap");
throw IDBExcept(ERR_DATA_OFFLINE);
}
}

View File

@ -1623,12 +1623,13 @@ void TupleBPS::makeJobs(vector<Job> *jobs)
if (dbRootConnectionMap->find(scannedExtents[i].dbRoot) == dbRootConnectionMap->end())
{
// MCOL-259 force a reload of the xml. This usualy fixes it.
std::cout << "forcing reload of columnstore.xml for dbRootConnectionMap" << std::endl;
Logger log;
log.logMessage(logging::LOG_TYPE_WARNING, "forcing reload of columnstore.xml for dbRootConnectionMap");
oamCache->forceReload();
dbRootConnectionMap = oamCache->getDBRootToConnectionMap();
if (dbRootConnectionMap->find(scannedExtents[i].dbRoot) == dbRootConnectionMap->end())
{
std::cout << "still not in dbRootConnectionMap" << std::endl;
log.logMessage(logging::LOG_TYPE_WARNING, "dbroot still not in dbRootConnectionMap");
throw IDBExcept(ERR_DATA_OFFLINE);
}
}

View File

@ -1618,6 +1618,7 @@ namespace oam
{}
// no match found
state = oam::UNEQUIP;
exceptionControl("getModuleStatus", API_INVALID_PARAMETER);
}

View File

@ -273,7 +273,8 @@ namespace oam
STANDBY_INIT, // 18 = Standby init
BUSY_INIT, // 19 = Busy init
ROLLBACK_INIT, // 20 = Rollback during DML init
STATE_MAX // 21 = Max value
PID_UPDATE, // 21 = Assigning the pid
STATE_MAX // 22 = Max value
};
/** @brief String State
@ -2390,17 +2391,17 @@ namespace oam
*/
EXPORT void dbrmctl(std::string command);
/** @brief Wait for system to close transactions
*
* When a Shutdown, stop, restart or suspend operation is
* requested but there are active transactions of some sort,
* We wait for all transactions to close before performing
* the action.
*/
EXPORT bool waitForSystem(PROC_MGT_MSG_REQUEST request, messageqcpp::IOSocket& ios, messageqcpp::ByteStream& stillWorkingMsg);
/** @brief Wait for system to close transactions
*
* When a Shutdown, stop, restart or suspend operation is
* requested but there are active transactions of some sort,
* We wait for all transactions to close before performing
* the action.
*/
EXPORT bool waitForSystem(PROC_MGT_MSG_REQUEST request, messageqcpp::IOSocket& ios, messageqcpp::ByteStream& stillWorkingMsg);
void amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach = false);
void mountDBRoot(dbrootList dbrootConfigList, bool mount = true);
void amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach = false);
void mountDBRoot(dbrootList dbrootConfigList, bool mount = true);
/**
*@brief gluster control
@ -2431,19 +2432,24 @@ namespace oam
**/
EXPORT bool disableMySQLRep();
/** @brief check Gluster Log after a Gluster control call
*/
EXPORT int checkGlusterLog(std::string logFile, std::string& errmsg);
/** @brief check Gluster Log after a Gluster control call
*/
EXPORT int checkGlusterLog(std::string logFile, std::string& errmsg);
/** @brief check and get mysql user password
*/
EXPORT std::string getMySQLPassword(bool bypassConfig = false);
/** @brief check and get mysql user password
*/
EXPORT std::string getMySQLPassword(bool bypassConfig = false);
/** @brief update fstab with dbroot mounts
*/
EXPORT std::string updateFstab(std::string device, std::string dbrootID);
/** @brief update fstab with dbroot mounts
*/
EXPORT std::string updateFstab(std::string device, std::string dbrootID);
private:
/**
* @brief Write the message to the log
*/
void writeLog(const std::string logContent, const logging::LOG_TYPE logType = logging::LOG_TYPE_INFO);
private:
int sendMsgToProcMgr3(messageqcpp::ByteStream::byte requestType, snmpmanager::AlarmList& alarmlist, const std::string date);
@ -2476,11 +2482,6 @@ namespace oam
*/
void sendStatusUpdate(messageqcpp::ByteStream obs, messageqcpp::ByteStream::byte returnRequestType);
/**
* @brief Write the message to the log
*/
void writeLog(const std::string logContent, const logging::LOG_TYPE logType = logging::LOG_TYPE_INFO);
std::string CalpontConfigFile;
std::string AlarmConfigFile;
std::string ProcessConfigFile;

View File

@ -106,20 +106,36 @@ void OamCache::checkReload()
#if !defined(SKIP_OAM_INIT)
{
try {
int state = oam::ACTIVE; bool degraded;
int state = oam::MAN_INIT;
bool degraded;
char num[80];
int retry = 0;
// MCOL-259 retry for 5 seconds if the PM is in some INIT mode.
while (( state == oam::BUSY_INIT
|| state == oam::MAN_INIT
|| state == oam::PID_UPDATE)
&& retry < 5)
{
snprintf(num, 80, "%d", *it);
try {
oam.getModuleStatus(string("pm") + num, state, degraded);
}
catch (...) {break;}
snprintf(num, 80, "%d", *it);
try {
oam.getModuleStatus(string("pm") + num, state, degraded);
}
catch (...) {}
if (state == oam::ACTIVE) {
pmToConnectionMap[*it] = i++;
moduleIds.push_back(*it);
if (state == oam::ACTIVE || state == oam::DEGRADED) {
pmToConnectionMap[*it] = i++;
moduleIds.push_back(*it);
break;
}
sleep(1);
//cout << "pm " << *it << " -> connection " << (i-1) << endl;
}
if (state != oam::ACTIVE)
{
ostringstream os;
os << "OamCache::checkReload shows state for pm" << num << " as " << state;
oam.writeLog(os.str(), logging::LOG_TYPE_WARNING);
}
}
catch (...) { /* doesn't get added to the connection map */ }
}

View File

@ -2573,7 +2573,7 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName,
DepProcessName, DepModuleName, LogFile);
//Update Process Status: Update PID
updateProcessInfo(processName, STATE_MAX, newProcessID);
updateProcessInfo(processName, PID_UPDATE, newProcessID);
}
log.writeLog(__LINE__, processName + " PID is " + oam.itoa(newProcessID), LOG_TYPE_DEBUG);