You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-259 add some retry logic to the OAMCache system. Add that degraded is still valid for a PM.
This commit is contained in:
@ -367,12 +367,13 @@ void pDictionaryScan::sendPrimitiveMessages()
|
||||
if (dbRootConnectionMap->find(dbroot) == dbRootConnectionMap->end())
|
||||
{
|
||||
// MCOL-259 force a reload of the xml. This usualy fixes it.
|
||||
std::cout << "dictionary forcing reload of columnstore.xml for dbRootConnectionMap" << std::endl;
|
||||
Logger log;
|
||||
log.logMessage(logging::LOG_TYPE_DEBUG, "dictionary forcing reload of columnstore.xml for dbRootConnectionMap");
|
||||
oamCache->forceReload();
|
||||
dbRootConnectionMap = oamCache->getDBRootToConnectionMap();
|
||||
if (dbRootConnectionMap->find(dbroot) == dbRootConnectionMap->end())
|
||||
{
|
||||
std::cout << "dictionary still not in dbRootConnectionMap" << std::endl;
|
||||
log.logMessage(logging::LOG_TYPE_DEBUG, "dictionary still not in dbRootConnectionMap");
|
||||
throw IDBExcept(ERR_DATA_OFFLINE);
|
||||
}
|
||||
}
|
||||
|
@ -1623,12 +1623,13 @@ void TupleBPS::makeJobs(vector<Job> *jobs)
|
||||
if (dbRootConnectionMap->find(scannedExtents[i].dbRoot) == dbRootConnectionMap->end())
|
||||
{
|
||||
// MCOL-259 force a reload of the xml. This usualy fixes it.
|
||||
std::cout << "forcing reload of columnstore.xml for dbRootConnectionMap" << std::endl;
|
||||
Logger log;
|
||||
log.logMessage(logging::LOG_TYPE_WARNING, "forcing reload of columnstore.xml for dbRootConnectionMap");
|
||||
oamCache->forceReload();
|
||||
dbRootConnectionMap = oamCache->getDBRootToConnectionMap();
|
||||
if (dbRootConnectionMap->find(scannedExtents[i].dbRoot) == dbRootConnectionMap->end())
|
||||
{
|
||||
std::cout << "still not in dbRootConnectionMap" << std::endl;
|
||||
log.logMessage(logging::LOG_TYPE_WARNING, "dbroot still not in dbRootConnectionMap");
|
||||
throw IDBExcept(ERR_DATA_OFFLINE);
|
||||
}
|
||||
}
|
||||
|
@ -1618,6 +1618,7 @@ namespace oam
|
||||
{}
|
||||
|
||||
// no match found
|
||||
state = oam::UNEQUIP;
|
||||
exceptionControl("getModuleStatus", API_INVALID_PARAMETER);
|
||||
}
|
||||
|
||||
|
@ -273,7 +273,8 @@ namespace oam
|
||||
STANDBY_INIT, // 18 = Standby init
|
||||
BUSY_INIT, // 19 = Busy init
|
||||
ROLLBACK_INIT, // 20 = Rollback during DML init
|
||||
STATE_MAX // 21 = Max value
|
||||
PID_UPDATE, // 21 = Assigning the pid
|
||||
STATE_MAX // 22 = Max value
|
||||
};
|
||||
|
||||
/** @brief String State
|
||||
@ -2390,17 +2391,17 @@ namespace oam
|
||||
*/
|
||||
EXPORT void dbrmctl(std::string command);
|
||||
|
||||
/** @brief Wait for system to close transactions
|
||||
*
|
||||
* When a Shutdown, stop, restart or suspend operation is
|
||||
* requested but there are active transactions of some sort,
|
||||
* We wait for all transactions to close before performing
|
||||
* the action.
|
||||
*/
|
||||
EXPORT bool waitForSystem(PROC_MGT_MSG_REQUEST request, messageqcpp::IOSocket& ios, messageqcpp::ByteStream& stillWorkingMsg);
|
||||
/** @brief Wait for system to close transactions
|
||||
*
|
||||
* When a Shutdown, stop, restart or suspend operation is
|
||||
* requested but there are active transactions of some sort,
|
||||
* We wait for all transactions to close before performing
|
||||
* the action.
|
||||
*/
|
||||
EXPORT bool waitForSystem(PROC_MGT_MSG_REQUEST request, messageqcpp::IOSocket& ios, messageqcpp::ByteStream& stillWorkingMsg);
|
||||
|
||||
void amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach = false);
|
||||
void mountDBRoot(dbrootList dbrootConfigList, bool mount = true);
|
||||
void amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach = false);
|
||||
void mountDBRoot(dbrootList dbrootConfigList, bool mount = true);
|
||||
|
||||
/**
|
||||
*@brief gluster control
|
||||
@ -2431,19 +2432,24 @@ namespace oam
|
||||
**/
|
||||
EXPORT bool disableMySQLRep();
|
||||
|
||||
/** @brief check Gluster Log after a Gluster control call
|
||||
*/
|
||||
EXPORT int checkGlusterLog(std::string logFile, std::string& errmsg);
|
||||
/** @brief check Gluster Log after a Gluster control call
|
||||
*/
|
||||
EXPORT int checkGlusterLog(std::string logFile, std::string& errmsg);
|
||||
|
||||
/** @brief check and get mysql user password
|
||||
*/
|
||||
EXPORT std::string getMySQLPassword(bool bypassConfig = false);
|
||||
/** @brief check and get mysql user password
|
||||
*/
|
||||
EXPORT std::string getMySQLPassword(bool bypassConfig = false);
|
||||
|
||||
/** @brief update fstab with dbroot mounts
|
||||
*/
|
||||
EXPORT std::string updateFstab(std::string device, std::string dbrootID);
|
||||
/** @brief update fstab with dbroot mounts
|
||||
*/
|
||||
EXPORT std::string updateFstab(std::string device, std::string dbrootID);
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Write the message to the log
|
||||
*/
|
||||
void writeLog(const std::string logContent, const logging::LOG_TYPE logType = logging::LOG_TYPE_INFO);
|
||||
|
||||
private:
|
||||
|
||||
int sendMsgToProcMgr3(messageqcpp::ByteStream::byte requestType, snmpmanager::AlarmList& alarmlist, const std::string date);
|
||||
|
||||
@ -2476,11 +2482,6 @@ namespace oam
|
||||
*/
|
||||
void sendStatusUpdate(messageqcpp::ByteStream obs, messageqcpp::ByteStream::byte returnRequestType);
|
||||
|
||||
/**
|
||||
* @brief Write the message to the log
|
||||
*/
|
||||
void writeLog(const std::string logContent, const logging::LOG_TYPE logType = logging::LOG_TYPE_INFO);
|
||||
|
||||
std::string CalpontConfigFile;
|
||||
std::string AlarmConfigFile;
|
||||
std::string ProcessConfigFile;
|
||||
|
@ -106,20 +106,36 @@ void OamCache::checkReload()
|
||||
#if !defined(SKIP_OAM_INIT)
|
||||
{
|
||||
try {
|
||||
int state = oam::ACTIVE; bool degraded;
|
||||
int state = oam::MAN_INIT;
|
||||
bool degraded;
|
||||
char num[80];
|
||||
int retry = 0;
|
||||
// MCOL-259 retry for 5 seconds if the PM is in some INIT mode.
|
||||
while (( state == oam::BUSY_INIT
|
||||
|| state == oam::MAN_INIT
|
||||
|| state == oam::PID_UPDATE)
|
||||
&& retry < 5)
|
||||
{
|
||||
snprintf(num, 80, "%d", *it);
|
||||
try {
|
||||
oam.getModuleStatus(string("pm") + num, state, degraded);
|
||||
}
|
||||
catch (...) {break;}
|
||||
|
||||
snprintf(num, 80, "%d", *it);
|
||||
try {
|
||||
oam.getModuleStatus(string("pm") + num, state, degraded);
|
||||
}
|
||||
catch (...) {}
|
||||
|
||||
if (state == oam::ACTIVE) {
|
||||
pmToConnectionMap[*it] = i++;
|
||||
moduleIds.push_back(*it);
|
||||
if (state == oam::ACTIVE || state == oam::DEGRADED) {
|
||||
pmToConnectionMap[*it] = i++;
|
||||
moduleIds.push_back(*it);
|
||||
break;
|
||||
}
|
||||
sleep(1);
|
||||
//cout << "pm " << *it << " -> connection " << (i-1) << endl;
|
||||
}
|
||||
if (state != oam::ACTIVE)
|
||||
{
|
||||
ostringstream os;
|
||||
os << "OamCache::checkReload shows state for pm" << num << " as " << state;
|
||||
oam.writeLog(os.str(), logging::LOG_TYPE_WARNING);
|
||||
}
|
||||
}
|
||||
catch (...) { /* doesn't get added to the connection map */ }
|
||||
}
|
||||
|
@ -2573,7 +2573,7 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName,
|
||||
DepProcessName, DepModuleName, LogFile);
|
||||
|
||||
//Update Process Status: Update PID
|
||||
updateProcessInfo(processName, STATE_MAX, newProcessID);
|
||||
updateProcessInfo(processName, PID_UPDATE, newProcessID);
|
||||
}
|
||||
|
||||
log.writeLog(__LINE__, processName + " PID is " + oam.itoa(newProcessID), LOG_TYPE_DEBUG);
|
||||
|
Reference in New Issue
Block a user