mirror of
https://github.com/MariaDB/server.git
synced 2025-09-02 09:41:40 +03:00
WL#2347 - Load independent heartbeats
Reset missed heartbeat count on receipt of signal from node. This fixes a bug where that under high network load, the heartbeat packets could be delayed, causing the appearance of node failure (due to lost heartbeats). ndb/include/kernel/NodeInfo.hpp: Add m_heartbeat_cnt to track missed heartbeats ndb/include/transporter/TransporterCallback.hpp: add prototype for transporter_recv_from() Called on receipt from a node. ndb/src/common/transporter/TransporterRegistry.cpp: Add calls to transporter_receive_from when data is received (before unpack) ndb/src/kernel/blocks/qmgr/Qmgr.hpp: remove NodeRec::alarmCount. missed heartbeat count now kept in NodeInfo ndb/src/kernel/blocks/qmgr/QmgrMain.cpp: Use NodeInfo::m_heartbeat_cnt for missed heartbeat count ndb/src/kernel/vm/TransporterCallback.cpp: add transporter_recv_from(), which is called on receipt of signals. It resets missed heartbeat count for that node. ndb/src/ndbapi/ClusterMgr.cpp: Use NodeInfo::m_heartbeat_cnt for missed heartbeat count ndb/src/ndbapi/ClusterMgr.hpp: Use NodeInfo::m_heartbeat_cnt instead of ClusterMgr::Node::hbSent for missed heartbeat count. We now use the same storage for API and Kernel heartbeats. Add ClusterMgr::hb_received(nodeId) to reset hbSent (as if we received a heartbeat, but callable from elsewhere - e.g. when signal received) ndb/src/ndbapi/TransporterFacade.cpp: Implement transporter_recv_from for ndbapi - which resets hbSent ndb/src/ndbapi/TransporterFacade.hpp: Add hb_received(nodeId)
This commit is contained in:
@@ -41,6 +41,7 @@ public:
|
|||||||
Uint32 m_type; ///< Node type
|
Uint32 m_type; ///< Node type
|
||||||
Uint32 m_connectCount; ///< No of times connected
|
Uint32 m_connectCount; ///< No of times connected
|
||||||
bool m_connected; ///< Node is connected
|
bool m_connected; ///< Node is connected
|
||||||
|
Uint32 m_heartbeat_cnt; ///< Missed heartbeats
|
||||||
|
|
||||||
friend NdbOut & operator<<(NdbOut&, const NodeInfo&);
|
friend NdbOut & operator<<(NdbOut&, const NodeInfo&);
|
||||||
};
|
};
|
||||||
@@ -52,6 +53,7 @@ NodeInfo::NodeInfo(){
|
|||||||
m_signalVersion = 0;
|
m_signalVersion = 0;
|
||||||
m_type = INVALID;
|
m_type = INVALID;
|
||||||
m_connectCount = 0;
|
m_connectCount = 0;
|
||||||
|
m_heartbeat_cnt= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
|
@@ -341,5 +341,8 @@ enum TransporterError {
|
|||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
reportError(void * callbackObj, NodeId nodeId, TransporterError errorCode);
|
reportError(void * callbackObj, NodeId nodeId, TransporterError errorCode);
|
||||||
|
|
||||||
|
void
|
||||||
|
transporter_recv_from(void* callbackObj, NodeId node);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -918,6 +918,7 @@ TransporterRegistry::performReceive()
|
|||||||
NodeId remoteNodeId;
|
NodeId remoteNodeId;
|
||||||
Uint32 * readPtr;
|
Uint32 * readPtr;
|
||||||
Uint32 sz = theOSEReceiver->getReceiveData(&remoteNodeId, &readPtr);
|
Uint32 sz = theOSEReceiver->getReceiveData(&remoteNodeId, &readPtr);
|
||||||
|
transporter_recv_from(callbackObj, remoteNodeId);
|
||||||
Uint32 szUsed = unpack(readPtr,
|
Uint32 szUsed = unpack(readPtr,
|
||||||
sz,
|
sz,
|
||||||
remoteNodeId,
|
remoteNodeId,
|
||||||
@@ -953,6 +954,7 @@ TransporterRegistry::performReceive()
|
|||||||
{
|
{
|
||||||
Uint32 * ptr;
|
Uint32 * ptr;
|
||||||
Uint32 sz = t->getReceiveData(&ptr);
|
Uint32 sz = t->getReceiveData(&ptr);
|
||||||
|
transporter_recv_from(callbackObj, nodeId);
|
||||||
Uint32 szUsed = unpack(ptr, sz, nodeId, ioStates[nodeId]);
|
Uint32 szUsed = unpack(ptr, sz, nodeId, ioStates[nodeId]);
|
||||||
t->updateReceiveDataPtr(szUsed);
|
t->updateReceiveDataPtr(szUsed);
|
||||||
}
|
}
|
||||||
@@ -976,6 +978,7 @@ TransporterRegistry::performReceive()
|
|||||||
{
|
{
|
||||||
Uint32 * readPtr, * eodPtr;
|
Uint32 * readPtr, * eodPtr;
|
||||||
t->getReceivePtr(&readPtr, &eodPtr);
|
t->getReceivePtr(&readPtr, &eodPtr);
|
||||||
|
transporter_recv_from(callbackObj, nodeId);
|
||||||
Uint32 *newPtr = unpack(readPtr, eodPtr, nodeId, ioStates[nodeId]);
|
Uint32 *newPtr = unpack(readPtr, eodPtr, nodeId, ioStates[nodeId]);
|
||||||
t->updateReceivePtr(newPtr);
|
t->updateReceivePtr(newPtr);
|
||||||
}
|
}
|
||||||
@@ -993,6 +996,7 @@ TransporterRegistry::performReceive()
|
|||||||
{
|
{
|
||||||
Uint32 * readPtr, * eodPtr;
|
Uint32 * readPtr, * eodPtr;
|
||||||
t->getReceivePtr(&readPtr, &eodPtr);
|
t->getReceivePtr(&readPtr, &eodPtr);
|
||||||
|
transporter_recv_from(callbackObj, nodeId);
|
||||||
Uint32 *newPtr = unpack(readPtr, eodPtr, nodeId, ioStates[nodeId]);
|
Uint32 *newPtr = unpack(readPtr, eodPtr, nodeId, ioStates[nodeId]);
|
||||||
t->updateReceivePtr(newPtr);
|
t->updateReceivePtr(newPtr);
|
||||||
}
|
}
|
||||||
|
@@ -118,8 +118,7 @@ public:
|
|||||||
struct NodeRec {
|
struct NodeRec {
|
||||||
UintR ndynamicId;
|
UintR ndynamicId;
|
||||||
Phase phase;
|
Phase phase;
|
||||||
UintR alarmCount;
|
|
||||||
|
|
||||||
QmgrState sendPrepFailReqStatus;
|
QmgrState sendPrepFailReqStatus;
|
||||||
QmgrState sendCommitFailReqStatus;
|
QmgrState sendCommitFailReqStatus;
|
||||||
QmgrState sendPresToStatus;
|
QmgrState sendPresToStatus;
|
||||||
|
@@ -66,7 +66,7 @@ void Qmgr::execCM_HEARTBEAT(Signal* signal)
|
|||||||
jamEntry();
|
jamEntry();
|
||||||
hbNodePtr.i = signal->theData[0];
|
hbNodePtr.i = signal->theData[0];
|
||||||
ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec);
|
ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec);
|
||||||
hbNodePtr.p->alarmCount = 0;
|
setNodeInfo(hbNodePtr.i).m_heartbeat_cnt= 0;
|
||||||
return;
|
return;
|
||||||
}//Qmgr::execCM_HEARTBEAT()
|
}//Qmgr::execCM_HEARTBEAT()
|
||||||
|
|
||||||
@@ -1040,7 +1040,7 @@ void Qmgr::execCM_ADD(Signal* signal)
|
|||||||
jam();
|
jam();
|
||||||
ndbrequire(addNodePtr.p->phase == ZSTARTING);
|
ndbrequire(addNodePtr.p->phase == ZSTARTING);
|
||||||
addNodePtr.p->phase = ZRUNNING;
|
addNodePtr.p->phase = ZRUNNING;
|
||||||
addNodePtr.p->alarmCount = 0;
|
setNodeInfo(addNodePtr.i).m_heartbeat_cnt= 0;
|
||||||
c_clusterNodes.set(addNodePtr.i);
|
c_clusterNodes.set(addNodePtr.i);
|
||||||
findNeighbours(signal);
|
findNeighbours(signal);
|
||||||
|
|
||||||
@@ -1078,7 +1078,7 @@ Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){
|
|||||||
* NODES IN THE CLUSTER.
|
* NODES IN THE CLUSTER.
|
||||||
*/
|
*/
|
||||||
nodePtr.p->phase = ZRUNNING;
|
nodePtr.p->phase = ZRUNNING;
|
||||||
nodePtr.p->alarmCount = 0;
|
setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
|
||||||
findNeighbours(signal);
|
findNeighbours(signal);
|
||||||
c_clusterNodes.set(nodePtr.i);
|
c_clusterNodes.set(nodePtr.i);
|
||||||
c_start.reset();
|
c_start.reset();
|
||||||
@@ -1299,7 +1299,7 @@ void Qmgr::findNeighbours(Signal* signal)
|
|||||||
*---------------------------------------------------------------------*/
|
*---------------------------------------------------------------------*/
|
||||||
fnNodePtr.i = cneighbourl;
|
fnNodePtr.i = cneighbourl;
|
||||||
ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec);
|
ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec);
|
||||||
fnNodePtr.p->alarmCount = 0;
|
setNodeInfo(fnNodePtr.i).m_heartbeat_cnt= 0;
|
||||||
}//if
|
}//if
|
||||||
}//if
|
}//if
|
||||||
|
|
||||||
@@ -1347,8 +1347,8 @@ void Qmgr::initData(Signal* signal)
|
|||||||
} else {
|
} else {
|
||||||
nodePtr.p->phase = ZAPI_INACTIVE;
|
nodePtr.p->phase = ZAPI_INACTIVE;
|
||||||
}
|
}
|
||||||
|
|
||||||
nodePtr.p->alarmCount = 0;
|
setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
|
||||||
nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
|
nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
|
||||||
nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
|
nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
|
||||||
nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
|
nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
|
||||||
@@ -1550,18 +1550,18 @@ void Qmgr::checkHeartbeat(Signal* signal)
|
|||||||
}//if
|
}//if
|
||||||
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
|
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
|
||||||
|
|
||||||
nodePtr.p->alarmCount ++;
|
setNodeInfo(nodePtr.i).m_heartbeat_cnt++;
|
||||||
ndbrequire(nodePtr.p->phase == ZRUNNING);
|
ndbrequire(nodePtr.p->phase == ZRUNNING);
|
||||||
ndbrequire(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB);
|
ndbrequire(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB);
|
||||||
|
|
||||||
if(nodePtr.p->alarmCount > 2){
|
if(getNodeInfo(nodePtr.i).m_heartbeat_cnt > 2){
|
||||||
signal->theData[0] = NDB_LE_MissedHeartbeat;
|
signal->theData[0] = NDB_LE_MissedHeartbeat;
|
||||||
signal->theData[1] = nodePtr.i;
|
signal->theData[1] = nodePtr.i;
|
||||||
signal->theData[2] = nodePtr.p->alarmCount - 1;
|
signal->theData[2] = getNodeInfo(nodePtr.i).m_heartbeat_cnt - 1;
|
||||||
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
|
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nodePtr.p->alarmCount > 4) {
|
if (getNodeInfo(nodePtr.i).m_heartbeat_cnt > 4) {
|
||||||
jam();
|
jam();
|
||||||
/**----------------------------------------------------------------------
|
/**----------------------------------------------------------------------
|
||||||
* OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT
|
* OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT
|
||||||
@@ -1593,16 +1593,16 @@ void Qmgr::apiHbHandlingLab(Signal* signal)
|
|||||||
|
|
||||||
if (TnodePtr.p->phase == ZAPI_ACTIVE){
|
if (TnodePtr.p->phase == ZAPI_ACTIVE){
|
||||||
jam();
|
jam();
|
||||||
TnodePtr.p->alarmCount ++;
|
setNodeInfo(TnodePtr.i).m_heartbeat_cnt++;
|
||||||
|
|
||||||
if(TnodePtr.p->alarmCount > 2){
|
if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2){
|
||||||
signal->theData[0] = NDB_LE_MissedHeartbeat;
|
signal->theData[0] = NDB_LE_MissedHeartbeat;
|
||||||
signal->theData[1] = nodeId;
|
signal->theData[1] = nodeId;
|
||||||
signal->theData[2] = TnodePtr.p->alarmCount - 1;
|
signal->theData[2] = getNodeInfo(TnodePtr.i).m_heartbeat_cnt - 1;
|
||||||
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
|
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (TnodePtr.p->alarmCount > 4) {
|
if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4) {
|
||||||
jam();
|
jam();
|
||||||
/*------------------------------------------------------------------*/
|
/*------------------------------------------------------------------*/
|
||||||
/* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
|
/* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
|
||||||
@@ -1634,16 +1634,17 @@ void Qmgr::checkStartInterface(Signal* signal)
|
|||||||
ptrAss(nodePtr, nodeRec);
|
ptrAss(nodePtr, nodeRec);
|
||||||
if (nodePtr.p->phase == ZFAIL_CLOSING) {
|
if (nodePtr.p->phase == ZFAIL_CLOSING) {
|
||||||
jam();
|
jam();
|
||||||
nodePtr.p->alarmCount = nodePtr.p->alarmCount + 1;
|
setNodeInfo(nodePtr.i).m_heartbeat_cnt++;
|
||||||
if (c_connectedNodes.get(nodePtr.i)){
|
if (c_connectedNodes.get(nodePtr.i)){
|
||||||
jam();
|
jam();
|
||||||
/*-------------------------------------------------------------------*/
|
/*-------------------------------------------------------------------*/
|
||||||
// We need to ensure that the connection is not restored until it has
|
// We need to ensure that the connection is not restored until it has
|
||||||
// been disconnected for at least three seconds.
|
// been disconnected for at least three seconds.
|
||||||
/*-------------------------------------------------------------------*/
|
/*-------------------------------------------------------------------*/
|
||||||
nodePtr.p->alarmCount = 0;
|
setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
|
||||||
}//if
|
}//if
|
||||||
if ((nodePtr.p->alarmCount > 3) && (nodePtr.p->failState == NORMAL)) {
|
if ((getNodeInfo(nodePtr.i).m_heartbeat_cnt > 3)
|
||||||
|
&& (nodePtr.p->failState == NORMAL)) {
|
||||||
/**------------------------------------------------------------------
|
/**------------------------------------------------------------------
|
||||||
* WE HAVE DISCONNECTED THREE SECONDS AGO. WE ARE NOW READY TO
|
* WE HAVE DISCONNECTED THREE SECONDS AGO. WE ARE NOW READY TO
|
||||||
* CONNECT AGAIN AND ACCEPT NEW REGISTRATIONS FROM THIS NODE.
|
* CONNECT AGAIN AND ACCEPT NEW REGISTRATIONS FROM THIS NODE.
|
||||||
@@ -1659,18 +1660,18 @@ void Qmgr::checkStartInterface(Signal* signal)
|
|||||||
nodePtr.p->phase = ZINIT;
|
nodePtr.p->phase = ZINIT;
|
||||||
}//if
|
}//if
|
||||||
|
|
||||||
nodePtr.p->alarmCount = 0;
|
setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
|
||||||
signal->theData[0] = 0;
|
signal->theData[0] = 0;
|
||||||
signal->theData[1] = nodePtr.i;
|
signal->theData[1] = nodePtr.i;
|
||||||
sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
|
sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
|
||||||
} else {
|
} else {
|
||||||
if(((nodePtr.p->alarmCount + 1) % 60) == 0){
|
if(((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 60) == 0){
|
||||||
char buf[100];
|
char buf[100];
|
||||||
BaseString::snprintf(buf, sizeof(buf),
|
BaseString::snprintf(buf, sizeof(buf),
|
||||||
"Failure handling of node %d has not completed in %d min."
|
"Failure handling of node %d has not completed in %d min."
|
||||||
" - state = %d",
|
" - state = %d",
|
||||||
nodePtr.i,
|
nodePtr.i,
|
||||||
(nodePtr.p->alarmCount + 1)/60,
|
(getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1)/60,
|
||||||
nodePtr.p->failState);
|
nodePtr.p->failState);
|
||||||
warningEvent(buf);
|
warningEvent(buf);
|
||||||
}
|
}
|
||||||
@@ -1718,7 +1719,7 @@ void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo)
|
|||||||
* WE ONLY NEED TO SET PARAMETERS TO ENABLE A NEW CONNECTION IN A FEW
|
* WE ONLY NEED TO SET PARAMETERS TO ENABLE A NEW CONNECTION IN A FEW
|
||||||
* SECONDS.
|
* SECONDS.
|
||||||
*-------------------------------------------------------------------------*/
|
*-------------------------------------------------------------------------*/
|
||||||
failedNodePtr.p->alarmCount = 0;
|
setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
|
||||||
|
|
||||||
CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
|
CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
|
||||||
|
|
||||||
@@ -1871,7 +1872,7 @@ void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode)
|
|||||||
/*---------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------*/
|
||||||
failedNodePtr.p->failState = NORMAL;
|
failedNodePtr.p->failState = NORMAL;
|
||||||
failedNodePtr.p->phase = ZFAIL_CLOSING;
|
failedNodePtr.p->phase = ZFAIL_CLOSING;
|
||||||
failedNodePtr.p->alarmCount = 0;
|
setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
|
||||||
|
|
||||||
CloseComReqConf * const closeCom =
|
CloseComReqConf * const closeCom =
|
||||||
(CloseComReqConf *)&signal->theData[0];
|
(CloseComReqConf *)&signal->theData[0];
|
||||||
@@ -1965,8 +1966,8 @@ void Qmgr::execAPI_REGREQ(Signal* signal)
|
|||||||
}
|
}
|
||||||
|
|
||||||
setNodeInfo(apiNodePtr.i).m_version = version;
|
setNodeInfo(apiNodePtr.i).m_version = version;
|
||||||
|
|
||||||
apiNodePtr.p->alarmCount = 0;
|
setNodeInfo(apiNodePtr.i).m_heartbeat_cnt= 0;
|
||||||
|
|
||||||
ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
|
ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
|
||||||
apiRegConf->qmgrRef = reference();
|
apiRegConf->qmgrRef = reference();
|
||||||
@@ -2484,7 +2485,7 @@ void Qmgr::execCOMMIT_FAILREQ(Signal* signal)
|
|||||||
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
|
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
|
||||||
nodePtr.p->phase = ZFAIL_CLOSING;
|
nodePtr.p->phase = ZFAIL_CLOSING;
|
||||||
nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF;
|
nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF;
|
||||||
nodePtr.p->alarmCount = 0;
|
setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
|
||||||
c_clusterNodes.clear(nodePtr.i);
|
c_clusterNodes.clear(nodePtr.i);
|
||||||
}//for
|
}//for
|
||||||
/*----------------------------------------------------------------------*/
|
/*----------------------------------------------------------------------*/
|
||||||
@@ -2742,7 +2743,7 @@ void Qmgr::failReport(Signal* signal,
|
|||||||
failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
|
failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
|
||||||
failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
|
failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
|
||||||
failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
|
failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
|
||||||
failedNodePtr.p->alarmCount = 0;
|
setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
|
||||||
if (aSendFailRep == ZTRUE) {
|
if (aSendFailRep == ZTRUE) {
|
||||||
jam();
|
jam();
|
||||||
if (failedNodePtr.i != getOwnNodeId()) {
|
if (failedNodePtr.i != getOwnNodeId()) {
|
||||||
|
@@ -33,6 +33,7 @@
|
|||||||
#include <NdbOut.hpp>
|
#include <NdbOut.hpp>
|
||||||
#include "DataBuffer.hpp"
|
#include "DataBuffer.hpp"
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The instance
|
* The instance
|
||||||
*/
|
*/
|
||||||
@@ -452,3 +453,8 @@ SignalLoggerManager::printSegmentedSection(FILE * output,
|
|||||||
putc('\n', output);
|
putc('\n', output);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
transporter_recv_from(void * callbackObj, NodeId nodeId){
|
||||||
|
globalData.m_nodeInfo[nodeId].m_heartbeat_cnt= 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
@@ -214,7 +214,7 @@ ClusterMgr::threadMain( ){
|
|||||||
* It is now time to send a new Heartbeat
|
* It is now time to send a new Heartbeat
|
||||||
*/
|
*/
|
||||||
if (theNode.hbCounter >= theNode.hbFrequency) {
|
if (theNode.hbCounter >= theNode.hbFrequency) {
|
||||||
theNode.hbSent++;
|
theNode.m_info.m_heartbeat_cnt++;
|
||||||
theNode.hbCounter = 0;
|
theNode.hbCounter = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -231,7 +231,7 @@ ClusterMgr::threadMain( ){
|
|||||||
theFacade.sendSignalUnCond(&signal, nodeId);
|
theFacade.sendSignalUnCond(&signal, nodeId);
|
||||||
}//if
|
}//if
|
||||||
|
|
||||||
if (theNode.hbSent == 4 && theNode.hbFrequency > 0){
|
if (theNode.m_info.m_heartbeat_cnt == 4 && theNode.hbFrequency > 0){
|
||||||
reportNodeFailed(i);
|
reportNodeFailed(i);
|
||||||
}//if
|
}//if
|
||||||
}
|
}
|
||||||
@@ -337,7 +337,7 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
|
|||||||
node.compatible = ndbCompatible_api_ndb(NDB_VERSION,
|
node.compatible = ndbCompatible_api_ndb(NDB_VERSION,
|
||||||
node.m_info.m_version);
|
node.m_info.m_version);
|
||||||
}
|
}
|
||||||
|
|
||||||
node.m_state = apiRegConf->nodeState;
|
node.m_state = apiRegConf->nodeState;
|
||||||
if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED ||
|
if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED ||
|
||||||
node.m_state.startLevel == NodeState::SL_SINGLEUSER)){
|
node.m_state.startLevel == NodeState::SL_SINGLEUSER)){
|
||||||
@@ -345,7 +345,7 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
|
|||||||
} else {
|
} else {
|
||||||
set_node_alive(node, false);
|
set_node_alive(node, false);
|
||||||
}//if
|
}//if
|
||||||
node.hbSent = 0;
|
node.m_info.m_heartbeat_cnt = 0;
|
||||||
node.hbCounter = 0;
|
node.hbCounter = 0;
|
||||||
if (node.m_info.m_type != NodeInfo::REP) {
|
if (node.m_info.m_type != NodeInfo::REP) {
|
||||||
node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50;
|
node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50;
|
||||||
@@ -414,7 +414,7 @@ ClusterMgr::reportConnected(NodeId nodeId){
|
|||||||
|
|
||||||
Node & theNode = theNodes[nodeId];
|
Node & theNode = theNodes[nodeId];
|
||||||
theNode.connected = true;
|
theNode.connected = true;
|
||||||
theNode.hbSent = 0;
|
theNode.m_info.m_heartbeat_cnt = 0;
|
||||||
theNode.hbCounter = 0;
|
theNode.hbCounter = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -73,12 +73,12 @@ public:
|
|||||||
*/
|
*/
|
||||||
Uint32 hbFrequency; // Heartbeat frequence
|
Uint32 hbFrequency; // Heartbeat frequence
|
||||||
Uint32 hbCounter; // # milliseconds passed since last hb sent
|
Uint32 hbCounter; // # milliseconds passed since last hb sent
|
||||||
Uint32 hbSent; // # heartbeats sent (without answer)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const Node & getNodeInfo(NodeId) const;
|
const Node & getNodeInfo(NodeId) const;
|
||||||
Uint32 getNoOfConnectedNodes() const;
|
Uint32 getNoOfConnectedNodes() const;
|
||||||
|
void hb_received(NodeId);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Uint32 noOfAliveNodes;
|
Uint32 noOfAliveNodes;
|
||||||
Uint32 noOfConnectedNodes;
|
Uint32 noOfConnectedNodes;
|
||||||
@@ -128,6 +128,12 @@ ClusterMgr::getNoOfConnectedNodes() const {
|
|||||||
return noOfConnectedNodes;
|
return noOfConnectedNodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void
|
||||||
|
ClusterMgr::hb_received(NodeId nodeId) {
|
||||||
|
theNodes[nodeId].m_info.m_heartbeat_cnt= 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*****************************************************************************/
|
/*****************************************************************************/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -126,6 +126,10 @@ reportDisconnect(void * callbackObj, NodeId nodeId, Uint32 error){
|
|||||||
//TransporterFacade::instance()->reportDisconnected(nodeId);
|
//TransporterFacade::instance()->reportDisconnected(nodeId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
transporter_recv_from(void * callbackObj, NodeId nodeId){
|
||||||
|
((TransporterFacade*)(callbackObj))->hb_received(nodeId);
|
||||||
|
}
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
*
|
*
|
||||||
|
@@ -114,6 +114,9 @@ public:
|
|||||||
|
|
||||||
TransporterRegistry* get_registry() { return theTransporterRegistry;};
|
TransporterRegistry* get_registry() { return theTransporterRegistry;};
|
||||||
|
|
||||||
|
// heart beat received from a node (e.g. a signal came)
|
||||||
|
void hb_received(NodeId n);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
* Send a signal unconditional of node status (used by ClusterMgr)
|
* Send a signal unconditional of node status (used by ClusterMgr)
|
||||||
@@ -295,6 +298,12 @@ TransporterFacade::get_node_alive(NodeId n) const {
|
|||||||
return node.m_alive;
|
return node.m_alive;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void
|
||||||
|
TransporterFacade::hb_received(NodeId n) {
|
||||||
|
theClusterMgr->hb_received(n);
|
||||||
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
bool
|
bool
|
||||||
TransporterFacade::get_node_stopping(NodeId n) const {
|
TransporterFacade::get_node_stopping(NodeId n) const {
|
||||||
|
Reference in New Issue
Block a user