From 746fc2f45712814a89cefca9e2729047502ea736 Mon Sep 17 00:00:00 2001
From: unknown <stewart@mysql.com>
Date: Tue, 23 May 2006 18:16:26 +1000
Subject: [PATCH 1/8] BUG#13985 Cluster: ndb_mgm "status" command can return
 incorrect data node status

partial fix for this bug. more info on what the other half of the fix involves
is in the bug report.


ndb/src/mgmclient/CommandInterpreter.cpp:
  partial fix for bug13985

  hold a mutex around printing out events
  hold the mutex also around printing out put of 'status' commands.

  this means we don't get
  1 started
  2 started
  2 starting

  output.

  we'll instead get the event before/after the entire status output.
  Due to the nature of the event arriving before status being updated,
  we'll pretty much always see started AFTER starting.
---
 ndb/src/mgmclient/CommandInterpreter.cpp | 28 ++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/ndb/src/mgmclient/CommandInterpreter.cpp b/ndb/src/mgmclient/CommandInterpreter.cpp
index 14cd3fba42b..cb85cd8f3e6 100644
--- a/ndb/src/mgmclient/CommandInterpreter.cpp
+++ b/ndb/src/mgmclient/CommandInterpreter.cpp
@@ -172,8 +172,15 @@ private:
   bool rep_connected;
 #endif
   struct NdbThread* m_event_thread;
+  NdbMutex *m_print_mutex;
 };
 
+struct event_thread_param {
+  NdbMgmHandle *m;
+  NdbMutex **p;
+};
+
+NdbMutex* print_mutex;
 
 /*
  * Facade object for CommandInterpreter
@@ -409,6 +416,7 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose)
   m_connected= false;
   m_event_thread= 0;
   try_reconnect = 0;
+  m_print_mutex= NdbMutex_Create();
 #ifdef HAVE_GLOBAL_REPLICATION
   rep_host = NULL;
   m_repserver = NULL;
@@ -422,6 +430,7 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose)
 CommandInterpreter::~CommandInterpreter() 
 {
   disconnect();
+  NdbMutex_Destroy(m_print_mutex);
   ndb_mgm_destroy_handle(&m_mgmsrv);
   ndb_mgm_destroy_handle(&m_mgmsrv2);
 }
@@ -461,11 +470,13 @@ CommandInterpreter::printError()
 
 static int do_event_thread;
 static void*
-event_thread_run(void* m)
+event_thread_run(void* p)
 {
   DBUG_ENTER("event_thread_run");
 
-  NdbMgmHandle handle= *(NdbMgmHandle*)m;
+  struct event_thread_param param= *(struct event_thread_param*)p;
+  NdbMgmHandle handle= *(param.m);
+  NdbMutex* printmutex= *(param.p);
 
   int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP,
 		   1, NDB_MGM_EVENT_CATEGORY_STARTUP,
@@ -483,7 +494,11 @@ event_thread_run(void* m)
       {
 	const char ping_token[]= "<PING>";
 	if (memcmp(ping_token,tmp,sizeof(ping_token)-1))
-	  ndbout << tmp;
+	  if(tmp && strlen(tmp))
+          {
+            Guard g(printmutex);
+            ndbout << tmp;
+          }
       }
     } while(do_event_thread);
     NDB_CLOSE_SOCKET(fd);
@@ -516,8 +531,11 @@ CommandInterpreter::connect()
 	assert(m_event_thread == 0);
 	assert(do_event_thread == 0);
 	do_event_thread= 0;
+        struct event_thread_param p;
+        p.m= &m_mgmsrv2;
+        p.p= &m_print_mutex;
 	m_event_thread = NdbThread_Create(event_thread_run,
-					  (void**)&m_mgmsrv2,
+					  (void**)&p,
 					  32768,
 					  "CommandInterpreted_event_thread",
 					  NDB_THREAD_PRIO_LOW);
@@ -607,6 +625,7 @@ CommandInterpreter::execute(const char *_line, int _try_reconnect,
   int result= execute_impl(_line);
   if (error)
     *error= m_error;
+
   return result;
 }
 
@@ -920,6 +939,7 @@ CommandInterpreter::executeForAll(const char * cmd, ExecuteFunction fun,
     ndbout_c("Trying to start all nodes of system.");
     ndbout_c("Use ALL STATUS to see the system start-up phases.");
   } else {
+    Guard g(m_print_mutex);
     struct ndb_mgm_cluster_state *cl= ndb_mgm_get_status(m_mgmsrv);
     if(cl == 0){
       ndbout_c("Unable get status from management server");

From 3cea3705fe3bc40ad099057905d5932227638311 Mon Sep 17 00:00:00 2001
From: unknown <stewart@mysql.com>
Date: Mon, 3 Jul 2006 15:37:57 +1000
Subject: [PATCH 2/8] BUG#13985 ndb_mgm "status" command can return incorrect
 data node status

Second half of the fix for this bug.

This patch forces a heartbeat to be sent and will wait (a little while)
for replies. This way we can get

> all status
X starting
Y started
X started
>

which is okay as the new status comes after the old status, always.
There is the slimmest of opportunities to get output like above where only half
the cluster appears started.

This is about the best we can do with a command line interactive program.


ndb/src/mgmsrv/MgmtSrvr.cpp:
  Add updateStatus method to MgmtSrvr.

  Used to force an update of node status for the nodes.
ndb/src/mgmsrv/MgmtSrvr.hpp:
  add prototype for updateStatus(NodeBitmask) method
ndb/src/mgmsrv/Services.cpp:
  When status is queried, force an update of the status in the mgm server. (i.e. send heartbeats)
ndb/src/ndbapi/ClusterMgr.cpp:
  new DEBUG_REG define for debugging registration and HB code.

  Add ClusterMgr::forceHB(NodeBitmask) which sends a HB signal to each node in
  the bitmask and then waits for a REGCONF from them.
  Will only wait for a total of 1 second, not blocking an end client for too long.

  On receipt of HB, clear the nodeId in the waiting for bitmask and signal any
  waiting threads.
ndb/src/ndbapi/ClusterMgr.hpp:
  Add ::forceHB(NodeBitmask) and associated variables
---
 ndb/src/mgmsrv/MgmtSrvr.cpp   |  6 +++
 ndb/src/mgmsrv/MgmtSrvr.hpp   |  2 +
 ndb/src/mgmsrv/Services.cpp   |  3 ++
 ndb/src/ndbapi/ClusterMgr.cpp | 71 +++++++++++++++++++++++++++++++++--
 ndb/src/ndbapi/ClusterMgr.hpp | 10 ++++-
 5 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp
index b713a3908ab..f3e75f1afe4 100644
--- a/ndb/src/mgmsrv/MgmtSrvr.cpp
+++ b/ndb/src/mgmsrv/MgmtSrvr.cpp
@@ -1412,6 +1412,12 @@ MgmtSrvr::exitSingleUser(int * stopCount, bool abort)
 
 #include <ClusterMgr.hpp>
 
+void
+MgmtSrvr::updateStatus(NodeBitmask nodes)
+{
+  theFacade->theClusterMgr->forceHB(nodes);
+}
+
 int 
 MgmtSrvr::status(int nodeId, 
                  ndb_mgm_node_status * _status, 
diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp
index 007494a277d..1e59f32b76f 100644
--- a/ndb/src/mgmsrv/MgmtSrvr.hpp
+++ b/ndb/src/mgmsrv/MgmtSrvr.hpp
@@ -487,6 +487,8 @@ public:
   void get_connected_nodes(NodeBitmask &connected_nodes) const;
   SocketServer *get_socket_server() { return m_socket_server; }
 
+  void updateStatus(NodeBitmask nodes);
+
   //**************************************************************************
 private:
   //**************************************************************************
diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp
index d28e0eba5b3..1df7d9be7b4 100644
--- a/ndb/src/mgmsrv/Services.cpp
+++ b/ndb/src/mgmsrv/Services.cpp
@@ -951,6 +951,9 @@ printNodeStatus(OutputStream *output,
 		MgmtSrvr &mgmsrv,
 		enum ndb_mgm_node_type type) {
   NodeId nodeId = 0;
+  NodeBitmask hbnodes;
+  mgmsrv.get_connected_nodes(hbnodes);
+  mgmsrv.updateStatus(hbnodes);
   while(mgmsrv.getNextNodeId(&nodeId, type)) {
     enum ndb_mgm_node_status status;
     Uint32 startPhase = 0, 
diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp
index fbff57d3168..120ab76f7ca 100644
--- a/ndb/src/ndbapi/ClusterMgr.cpp
+++ b/ndb/src/ndbapi/ClusterMgr.cpp
@@ -39,6 +39,8 @@
 
 int global_flag_send_heartbeat_now= 0;
 
+//#define DEBUG_REG
+
 // Just a C wrapper for threadMain
 extern "C" 
 void*
@@ -67,6 +69,8 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
   DBUG_ENTER("ClusterMgr::ClusterMgr");
   ndbSetOwnVersion();
   clusterMgrThreadMutex = NdbMutex_Create();
+  waitForHBMutex= NdbMutex_Create();
+  waitForHBCond= NdbCondition_Create();
   noOfAliveNodes= 0;
   noOfConnectedNodes= 0;
   theClusterMgrThread= 0;
@@ -77,7 +81,9 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
 ClusterMgr::~ClusterMgr()
 {
   DBUG_ENTER("ClusterMgr::~ClusterMgr");
-  doStop();  
+  doStop();
+  NdbCondition_Destroy(waitForHBCond);
+  NdbMutex_Destroy(waitForHBMutex);
   NdbMutex_Destroy(clusterMgrThreadMutex);
   DBUG_VOID_RETURN;
 }
@@ -163,6 +169,49 @@ ClusterMgr::doStop( ){
   DBUG_VOID_RETURN;
 }
 
+void
+ClusterMgr::forceHB(NodeBitmask waitFor)
+{
+    theFacade.lock_mutex();
+    global_flag_send_heartbeat_now= 1;
+
+    waitForHBFromNodes= waitFor;
+#ifdef DEBUG_REG
+    char buf[128];
+    ndbout << "Waiting for HB from " << waitForHBFromNodes.getText(buf) << endl;
+#endif
+    NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId()));
+
+    signal.theVerId_signalNumber   = GSN_API_REGREQ;
+    signal.theReceiversBlockNumber = QMGR;
+    signal.theTrace                = 0;
+    signal.theLength               = ApiRegReq::SignalLength;
+
+    ApiRegReq * req = CAST_PTR(ApiRegReq, signal.getDataPtrSend());
+    req->ref = numberToRef(API_CLUSTERMGR, theFacade.ownId());
+    req->version = NDB_VERSION;
+
+    int nodeId= 0;
+    for(int i=0;
+        NodeBitmask::NotFound!=(nodeId= waitForHBFromNodes.find(i));
+        i= nodeId+1)
+    {
+#ifdef DEBUG_REG
+      ndbout << "FORCE HB to " << nodeId << endl;
+#endif
+      theFacade.sendSignalUnCond(&signal, nodeId);
+    }
+
+    theFacade.unlock_mutex();
+
+    NdbMutex_Lock(waitForHBMutex);
+    NdbCondition_WaitTimeout(waitForHBCond, waitForHBMutex, 1000);
+    NdbMutex_Unlock(waitForHBMutex);
+#ifdef DEBUG_REG
+    ndbout << "Still waiting for HB from " << waitForHBFromNodes.getText(buf) << endl;
+#endif
+}
+
 void
 ClusterMgr::threadMain( ){
   NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId()));
@@ -226,7 +275,7 @@ ClusterMgr::threadMain( ){
 	if (theNode.m_info.m_type == NodeInfo::REP) {
 	  signal.theReceiversBlockNumber = API_CLUSTERMGR;
 	}
-#if 0 
+#ifdef DEBUG_REG
 	ndbout_c("ClusterMgr: Sending API_REGREQ to node %d", (int)nodeId);
 #endif
 	theFacade.sendSignalUnCond(&signal, nodeId);
@@ -278,7 +327,7 @@ ClusterMgr::execAPI_REGREQ(const Uint32 * theData){
   const ApiRegReq * const apiRegReq = (ApiRegReq *)&theData[0];
   const NodeId nodeId = refToNode(apiRegReq->ref);
 
-#if 0
+#ifdef DEBUG_REG
   ndbout_c("ClusterMgr: Recd API_REGREQ from node %d", nodeId);
 #endif
 
@@ -319,7 +368,7 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
   const ApiRegConf * const apiRegConf = (ApiRegConf *)&theData[0];
   const NodeId nodeId = refToNode(apiRegConf->qmgrRef);
   
-#if 0 
+#ifdef DEBUG_REG
   ndbout_c("ClusterMgr: Recd API_REGCONF from node %d", nodeId);
 #endif
 
@@ -351,6 +400,13 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
   if (node.m_info.m_type != NodeInfo::REP) {
     node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50;
   }
+  waitForHBFromNodes.clear(nodeId);
+  if(waitForHBFromNodes.isclear())
+  {
+    NdbMutex_Lock(waitForHBMutex);
+    NdbCondition_Signal(waitForHBCond);
+    NdbMutex_Unlock(waitForHBMutex);
+  }
 }
 
 void
@@ -379,6 +435,13 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){
   default:
     break;
   }
+  waitForHBFromNodes.clear(nodeId);
+  if(waitForHBFromNodes.isclear())
+  {
+    NdbMutex_Lock(waitForHBMutex);
+    NdbCondition_Signal(waitForHBCond);
+    NdbMutex_Unlock(waitForHBMutex);
+  }
 }
 
 void
diff --git a/ndb/src/ndbapi/ClusterMgr.hpp b/ndb/src/ndbapi/ClusterMgr.hpp
index 1a1e622a889..6eda98c4773 100644
--- a/ndb/src/ndbapi/ClusterMgr.hpp
+++ b/ndb/src/ndbapi/ClusterMgr.hpp
@@ -49,7 +49,9 @@ public:
 
   void doStop();
   void startThread();
-  
+
+  void forceHB(NodeBitmask waitFor);
+
 private:
   void threadMain();
   
@@ -85,7 +87,11 @@ private:
   Uint32        noOfConnectedNodes;
   Node          theNodes[MAX_NODES];
   NdbThread*    theClusterMgrThread;
-  
+
+  NodeBitmask   waitForHBFromNodes; // used in forcing HBs
+  NdbMutex*     waitForHBMutex;
+  NdbCondition* waitForHBCond;
+
   /**
    * Used for controlling start/stop of the thread
    */

From bd015e57dbbb8429eb40d59a875610ee5c38ae93 Mon Sep 17 00:00:00 2001
From: unknown <stewart@willster.(none)>
Date: Fri, 7 Jul 2006 17:39:11 +1000
Subject: [PATCH 3/8] BUG#13985 ndb_mgm "status" command can return incorrect
 data node status

better support parallel show commands, hold mutex when touching waitForHBFromNodes


ndb/src/ndbapi/ClusterMgr.cpp:
  correctly serialize ::forceHB calls and hold mutex for whole time updating
  waitForHBFromNodes
---
 ndb/src/ndbapi/ClusterMgr.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp
index 120ab76f7ca..db169304acd 100644
--- a/ndb/src/ndbapi/ClusterMgr.cpp
+++ b/ndb/src/ndbapi/ClusterMgr.cpp
@@ -172,6 +172,7 @@ ClusterMgr::doStop( ){
 void
 ClusterMgr::forceHB(NodeBitmask waitFor)
 {
+    NdbMutex_Lock(waitForHBMutex);
     theFacade.lock_mutex();
     global_flag_send_heartbeat_now= 1;
 
@@ -204,7 +205,6 @@ ClusterMgr::forceHB(NodeBitmask waitFor)
 
     theFacade.unlock_mutex();
 
-    NdbMutex_Lock(waitForHBMutex);
     NdbCondition_WaitTimeout(waitForHBCond, waitForHBMutex, 1000);
     NdbMutex_Unlock(waitForHBMutex);
 #ifdef DEBUG_REG
@@ -400,13 +400,14 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
   if (node.m_info.m_type != NodeInfo::REP) {
     node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50;
   }
+
+  NdbMutex_Lock(waitForHBMutex);
   waitForHBFromNodes.clear(nodeId);
+
   if(waitForHBFromNodes.isclear())
-  {
-    NdbMutex_Lock(waitForHBMutex);
     NdbCondition_Signal(waitForHBCond);
-    NdbMutex_Unlock(waitForHBMutex);
-  }
+
+  NdbMutex_Unlock(waitForHBMutex);
 }
 
 void

From 5a8919f290ab569e323d623b4da0714ed2d361da Mon Sep 17 00:00:00 2001
From: unknown <stewart@willster.(none)>
Date: Fri, 7 Jul 2006 18:39:38 +1000
Subject: [PATCH 4/8] BUG#13985 Cluster: ndb_mgm "status" command can return
 incorrect data node status

use existing transporter mutex


ndb/src/ndbapi/ClusterMgr.cpp:
  just use the transporter facade mutex.

  parallel 'show' will get woken up at the same time
ndb/src/ndbapi/ClusterMgr.hpp:
  remove wait for heartbeat mutex
---
 ndb/src/ndbapi/ClusterMgr.cpp | 25 +++++++++++--------------
 ndb/src/ndbapi/ClusterMgr.hpp |  1 -
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp
index db169304acd..772cbaf99b5 100644
--- a/ndb/src/ndbapi/ClusterMgr.cpp
+++ b/ndb/src/ndbapi/ClusterMgr.cpp
@@ -69,7 +69,6 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
   DBUG_ENTER("ClusterMgr::ClusterMgr");
   ndbSetOwnVersion();
   clusterMgrThreadMutex = NdbMutex_Create();
-  waitForHBMutex= NdbMutex_Create();
   waitForHBCond= NdbCondition_Create();
   noOfAliveNodes= 0;
   noOfConnectedNodes= 0;
@@ -83,7 +82,6 @@ ClusterMgr::~ClusterMgr()
   DBUG_ENTER("ClusterMgr::~ClusterMgr");
   doStop();
   NdbCondition_Destroy(waitForHBCond);
-  NdbMutex_Destroy(waitForHBMutex);
   NdbMutex_Destroy(clusterMgrThreadMutex);
   DBUG_VOID_RETURN;
 }
@@ -172,8 +170,15 @@ ClusterMgr::doStop( ){
 void
 ClusterMgr::forceHB(NodeBitmask waitFor)
 {
-    NdbMutex_Lock(waitForHBMutex);
     theFacade.lock_mutex();
+
+    if(!waitForHBFromNodes.isclear())
+    {
+      NdbCondition_WaitTimeout(waitForHBCond, theFacade.theMutexPtr, 1000);
+      theFacade.unlock_mutex();
+      return;
+    }
+
     global_flag_send_heartbeat_now= 1;
 
     waitForHBFromNodes= waitFor;
@@ -203,10 +208,8 @@ ClusterMgr::forceHB(NodeBitmask waitFor)
       theFacade.sendSignalUnCond(&signal, nodeId);
     }
 
+    NdbCondition_WaitTimeout(waitForHBCond, theFacade.theMutexPtr, 1000);
     theFacade.unlock_mutex();
-
-    NdbCondition_WaitTimeout(waitForHBCond, waitForHBMutex, 1000);
-    NdbMutex_Unlock(waitForHBMutex);
 #ifdef DEBUG_REG
     ndbout << "Still waiting for HB from " << waitForHBFromNodes.getText(buf) << endl;
 #endif
@@ -401,13 +404,10 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
     node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50;
   }
 
-  NdbMutex_Lock(waitForHBMutex);
   waitForHBFromNodes.clear(nodeId);
 
   if(waitForHBFromNodes.isclear())
-    NdbCondition_Signal(waitForHBCond);
-
-  NdbMutex_Unlock(waitForHBMutex);
+    NdbCondition_Broadcast(waitForHBCond);
 }
 
 void
@@ -436,13 +436,10 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){
   default:
     break;
   }
+
   waitForHBFromNodes.clear(nodeId);
   if(waitForHBFromNodes.isclear())
-  {
-    NdbMutex_Lock(waitForHBMutex);
     NdbCondition_Signal(waitForHBCond);
-    NdbMutex_Unlock(waitForHBMutex);
-  }
 }
 
 void
diff --git a/ndb/src/ndbapi/ClusterMgr.hpp b/ndb/src/ndbapi/ClusterMgr.hpp
index 6eda98c4773..3284fd8aa8a 100644
--- a/ndb/src/ndbapi/ClusterMgr.hpp
+++ b/ndb/src/ndbapi/ClusterMgr.hpp
@@ -89,7 +89,6 @@ private:
   NdbThread*    theClusterMgrThread;
 
   NodeBitmask   waitForHBFromNodes; // used in forcing HBs
-  NdbMutex*     waitForHBMutex;
   NdbCondition* waitForHBCond;
 
   /**

From a610467f71781644f9cb0461765033e9ea3d3b66 Mon Sep 17 00:00:00 2001
From: unknown <stewart@willster.(none)>
Date: Fri, 7 Jul 2006 20:10:15 +1000
Subject: [PATCH 5/8] BUG#13985: Cluster: ndb_mgm "status" command can return
 incorrect data node status

- only force HB to data nodes
- flag for if we broadcast condition on receipt of HB


ndb/src/mgmsrv/MgmtSrvr.cpp:
  Add get_connected_ndb_nodes to check status for connected data nodes only
ndb/src/mgmsrv/MgmtSrvr.hpp:
  add prototype for get_connected_ndb_nodes
ndb/src/mgmsrv/Services.cpp:
  only force HB to NDBD nodes
ndb/src/ndbapi/ClusterMgr.cpp:
  flag to control if we send the condition
ndb/src/ndbapi/ClusterMgr.hpp:
  flag for if we broadcast condition on receipt of hb
---
 ndb/src/mgmsrv/MgmtSrvr.cpp   | 19 +++++++++++++++++++
 ndb/src/mgmsrv/MgmtSrvr.hpp   |  1 +
 ndb/src/mgmsrv/Services.cpp   |  2 +-
 ndb/src/ndbapi/ClusterMgr.cpp | 19 ++++++++++++++-----
 ndb/src/ndbapi/ClusterMgr.hpp |  1 +
 5 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp
index f3e75f1afe4..793192dd487 100644
--- a/ndb/src/mgmsrv/MgmtSrvr.cpp
+++ b/ndb/src/mgmsrv/MgmtSrvr.cpp
@@ -1941,6 +1941,25 @@ MgmtSrvr::get_connected_nodes(NodeBitmask &connected_nodes) const
   }
 }
 
+void
+MgmtSrvr::get_connected_ndb_nodes(NodeBitmask &connected_nodes) const
+{
+  NodeBitmask ndb_nodes;
+  if (theFacade && theFacade->theClusterMgr)
+  {
+    for(Uint32 i = 0; i < MAX_NODES; i++)
+    {
+      if (getNodeType(i) == NDB_MGM_NODE_TYPE_NDB)
+      {
+        ndb_nodes.set(i);
+	const ClusterMgr::Node &node= theFacade->theClusterMgr->getNodeInfo(i);
+	connected_nodes.bitOR(node.m_state.m_connected_nodes);
+      }
+    }
+  }
+  connected_nodes.bitAND(ndb_nodes);
+}
+
 bool
 MgmtSrvr::alloc_node_id(NodeId * nodeId, 
 			enum ndb_mgm_node_type type,
diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp
index 1e59f32b76f..5bacf640a18 100644
--- a/ndb/src/mgmsrv/MgmtSrvr.hpp
+++ b/ndb/src/mgmsrv/MgmtSrvr.hpp
@@ -485,6 +485,7 @@ public:
 
   const char *get_connect_address(Uint32 node_id);
   void get_connected_nodes(NodeBitmask &connected_nodes) const;
+  void get_connected_ndb_nodes(NodeBitmask &connected_nodes) const;
   SocketServer *get_socket_server() { return m_socket_server; }
 
   void updateStatus(NodeBitmask nodes);
diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp
index 1df7d9be7b4..ae3433fe95b 100644
--- a/ndb/src/mgmsrv/Services.cpp
+++ b/ndb/src/mgmsrv/Services.cpp
@@ -952,7 +952,7 @@ printNodeStatus(OutputStream *output,
 		enum ndb_mgm_node_type type) {
   NodeId nodeId = 0;
   NodeBitmask hbnodes;
-  mgmsrv.get_connected_nodes(hbnodes);
+  mgmsrv.get_connected_ndb_nodes(hbnodes);
   mgmsrv.updateStatus(hbnodes);
   while(mgmsrv.getNextNodeId(&nodeId, type)) {
     enum ndb_mgm_node_status status;
diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp
index 772cbaf99b5..28f65eebde8 100644
--- a/ndb/src/ndbapi/ClusterMgr.cpp
+++ b/ndb/src/ndbapi/ClusterMgr.cpp
@@ -70,6 +70,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
   ndbSetOwnVersion();
   clusterMgrThreadMutex = NdbMutex_Create();
   waitForHBCond= NdbCondition_Create();
+  waitingForHB= false;
   noOfAliveNodes= 0;
   noOfConnectedNodes= 0;
   theClusterMgrThread= 0;
@@ -172,7 +173,7 @@ ClusterMgr::forceHB(NodeBitmask waitFor)
 {
     theFacade.lock_mutex();
 
-    if(!waitForHBFromNodes.isclear())
+    if(waitingForHB)
     {
       NdbCondition_WaitTimeout(waitForHBCond, theFacade.theMutexPtr, 1000);
       theFacade.unlock_mutex();
@@ -180,6 +181,7 @@ ClusterMgr::forceHB(NodeBitmask waitFor)
     }
 
     global_flag_send_heartbeat_now= 1;
+    waitingForHB= true;
 
     waitForHBFromNodes= waitFor;
 #ifdef DEBUG_REG
@@ -209,10 +211,11 @@ ClusterMgr::forceHB(NodeBitmask waitFor)
     }
 
     NdbCondition_WaitTimeout(waitForHBCond, theFacade.theMutexPtr, 1000);
-    theFacade.unlock_mutex();
+    waitingForHB= false;
 #ifdef DEBUG_REG
     ndbout << "Still waiting for HB from " << waitForHBFromNodes.getText(buf) << endl;
 #endif
+    theFacade.unlock_mutex();
 }
 
 void
@@ -404,10 +407,16 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
     node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50;
   }
 
-  waitForHBFromNodes.clear(nodeId);
+  if(waitingForHB)
+  {
+    waitForHBFromNodes.clear(nodeId);
 
-  if(waitForHBFromNodes.isclear())
-    NdbCondition_Broadcast(waitForHBCond);
+    if(waitForHBFromNodes.isclear())
+    {
+      waitingForHB= false;
+      NdbCondition_Broadcast(waitForHBCond);
+    }
+  }
 }
 
 void
diff --git a/ndb/src/ndbapi/ClusterMgr.hpp b/ndb/src/ndbapi/ClusterMgr.hpp
index 3284fd8aa8a..b9863821b4f 100644
--- a/ndb/src/ndbapi/ClusterMgr.hpp
+++ b/ndb/src/ndbapi/ClusterMgr.hpp
@@ -90,6 +90,7 @@ private:
 
   NodeBitmask   waitForHBFromNodes; // used in forcing HBs
   NdbCondition* waitForHBCond;
+  bool          waitingForHB;
 
   /**
    * Used for controlling start/stop of the thread

From 7fcb36e2afffeb63aa1be9a476a4c781413eeb70 Mon Sep 17 00:00:00 2001
From: unknown <stewart@willster.(none)>
Date: Wed, 9 Aug 2006 15:03:55 +0800
Subject: [PATCH 6/8] BUG#13985

fixups after review by jonas


ndb/src/mgmclient/CommandInterpreter.cpp:
  Guard the print mutex when running SHOW
ndb/src/mgmsrv/MgmtSrvr.cpp:
  replace global_flag_send_heartbeat_now with forceHB()/updateStatus()

  don't use bitmask as parameter to forceHB to reflect reality of what the
  function does.

  remove get_connected_ndb_nodes() as it is no longer used
ndb/src/mgmsrv/MgmtSrvr.hpp:
  remove unused get_connected_ndb_nodes()

  update updateStatus prototype
ndb/src/mgmsrv/Services.cpp:
  use new prototype for updateStatus() - doesn't accept NodeBitmask
ndb/src/ndbapi/ClusterMgr.cpp:
  remove global_flag_send_heartbeat_now, replace with forceHB.

  compute bitmask of nodes to send HB to in forceHB
ndb/src/ndbapi/ClusterMgr.hpp:
  update prototype for forceHB, don't give the illusion that NodeBitmask means much.
---
 ndb/src/mgmclient/CommandInterpreter.cpp |  1 +
 ndb/src/mgmsrv/MgmtSrvr.cpp              | 28 ++++-------------------
 ndb/src/mgmsrv/MgmtSrvr.hpp              |  3 +--
 ndb/src/mgmsrv/Services.cpp              |  4 +---
 ndb/src/ndbapi/ClusterMgr.cpp            | 29 ++++++++++++++++--------
 ndb/src/ndbapi/ClusterMgr.hpp            |  2 +-
 6 files changed, 27 insertions(+), 40 deletions(-)

diff --git a/ndb/src/mgmclient/CommandInterpreter.cpp b/ndb/src/mgmclient/CommandInterpreter.cpp
index 103c252ca04..d4f66a56da9 100644
--- a/ndb/src/mgmclient/CommandInterpreter.cpp
+++ b/ndb/src/mgmclient/CommandInterpreter.cpp
@@ -705,6 +705,7 @@ CommandInterpreter::execute_impl(const char *_line)
     DBUG_RETURN(true);
 
   if (strcasecmp(firstToken, "SHOW") == 0) {
+    Guard g(m_print_mutex);
     executeShow(allAfterFirstToken);
     DBUG_RETURN(true);
   }
diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp
index d514f0da1a4..5fabb84adb7 100644
--- a/ndb/src/mgmsrv/MgmtSrvr.cpp
+++ b/ndb/src/mgmsrv/MgmtSrvr.cpp
@@ -77,7 +77,6 @@
     }\
   }
 
-extern int global_flag_send_heartbeat_now;
 extern int g_no_nodeid_checks;
 extern my_bool opt_core;
 
@@ -1456,9 +1455,9 @@ MgmtSrvr::exitSingleUser(int * stopCount, bool abort)
 #include <ClusterMgr.hpp>
 
 void
-MgmtSrvr::updateStatus(NodeBitmask nodes)
+MgmtSrvr::updateStatus()
 {
-  theFacade->theClusterMgr->forceHB(nodes);
+  theFacade->theClusterMgr->forceHB();
 }
 
 int 
@@ -1985,25 +1984,6 @@ MgmtSrvr::get_connected_nodes(NodeBitmask &connected_nodes) const
   }
 }
 
-void
-MgmtSrvr::get_connected_ndb_nodes(NodeBitmask &connected_nodes) const
-{
-  NodeBitmask ndb_nodes;
-  if (theFacade && theFacade->theClusterMgr)
-  {
-    for(Uint32 i = 0; i < MAX_NODES; i++)
-    {
-      if (getNodeType(i) == NDB_MGM_NODE_TYPE_NDB)
-      {
-        ndb_nodes.set(i);
-	const ClusterMgr::Node &node= theFacade->theClusterMgr->getNodeInfo(i);
-	connected_nodes.bitOR(node.m_state.m_connected_nodes);
-      }
-    }
-  }
-  connected_nodes.bitAND(ndb_nodes);
-}
-
 bool
 MgmtSrvr::alloc_node_id(NodeId * nodeId, 
 			enum ndb_mgm_node_type type,
@@ -2178,7 +2158,7 @@ MgmtSrvr::alloc_node_id(NodeId * nodeId,
   if (found_matching_type && !found_free_node) {
     // we have a temporary error which might be due to that 
     // we have got the latest connect status from db-nodes.  Force update.
-    global_flag_send_heartbeat_now= 1;
+    updateStatus();
   }
 
   BaseString type_string, type_c_string;
@@ -2532,7 +2512,7 @@ MgmtSrvr::Allocated_resources::~Allocated_resources()
   if (!m_reserved_nodes.isclear()) {
     m_mgmsrv.m_reserved_nodes.bitANDC(m_reserved_nodes); 
     // node has been reserved, force update signal to ndb nodes
-    global_flag_send_heartbeat_now= 1;
+    m_mgmsrv.updateStatus();
 
     char tmp_str[128];
     m_mgmsrv.m_reserved_nodes.getText(tmp_str);
diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp
index ab71fe6f4dc..17debb19f50 100644
--- a/ndb/src/mgmsrv/MgmtSrvr.hpp
+++ b/ndb/src/mgmsrv/MgmtSrvr.hpp
@@ -488,10 +488,9 @@ public:
 
   const char *get_connect_address(Uint32 node_id);
   void get_connected_nodes(NodeBitmask &connected_nodes) const;
-  void get_connected_ndb_nodes(NodeBitmask &connected_nodes) const;
   SocketServer *get_socket_server() { return m_socket_server; }
 
-  void updateStatus(NodeBitmask nodes);
+  void updateStatus();
 
   //**************************************************************************
 private:
diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp
index 653f36ecc6d..7f5b0e29442 100644
--- a/ndb/src/mgmsrv/Services.cpp
+++ b/ndb/src/mgmsrv/Services.cpp
@@ -982,9 +982,7 @@ printNodeStatus(OutputStream *output,
 		MgmtSrvr &mgmsrv,
 		enum ndb_mgm_node_type type) {
   NodeId nodeId = 0;
-  NodeBitmask hbnodes;
-  mgmsrv.get_connected_ndb_nodes(hbnodes);
-  mgmsrv.updateStatus(hbnodes);
+  mgmsrv.updateStatus();
   while(mgmsrv.getNextNodeId(&nodeId, type)) {
     enum ndb_mgm_node_status status;
     Uint32 startPhase = 0, 
diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp
index 28f65eebde8..4b3c409e9d4 100644
--- a/ndb/src/ndbapi/ClusterMgr.cpp
+++ b/ndb/src/ndbapi/ClusterMgr.cpp
@@ -37,8 +37,6 @@
 #include <mgmapi_configuration.hpp>
 #include <mgmapi_config_parameters.h>
 
-int global_flag_send_heartbeat_now= 0;
-
 //#define DEBUG_REG
 
 // Just a C wrapper for threadMain
@@ -169,7 +167,7 @@ ClusterMgr::doStop( ){
 }
 
 void
-ClusterMgr::forceHB(NodeBitmask waitFor)
+ClusterMgr::forceHB()
 {
     theFacade.lock_mutex();
 
@@ -180,10 +178,25 @@ ClusterMgr::forceHB(NodeBitmask waitFor)
       return;
     }
 
-    global_flag_send_heartbeat_now= 1;
     waitingForHB= true;
 
-    waitForHBFromNodes= waitFor;
+    NodeBitmask ndb_nodes;
+    ndb_nodes.clear();
+    waitForHBFromNodes.clear();
+    for(Uint32 i = 0; i < MAX_NODES; i++)
+    {
+      if(!theNodes[i].defined)
+        continue;
+      if(theNodes[i].m_info.m_type == NodeInfo::DB)
+      {
+        ndb_nodes.set(i);
+        const ClusterMgr::Node &node= getNodeInfo(i);
+        waitForHBFromNodes.bitOR(node.m_state.m_connected_nodes);
+      }
+      ndbout << endl;
+    }
+    waitForHBFromNodes.bitAND(ndb_nodes);
+
 #ifdef DEBUG_REG
     char buf[128];
     ndbout << "Waiting for HB from " << waitForHBFromNodes.getText(buf) << endl;
@@ -239,9 +252,6 @@ ClusterMgr::threadMain( ){
     /**
      * Start of Secure area for use of Transporter
      */
-    int send_heartbeat_now= global_flag_send_heartbeat_now;
-    global_flag_send_heartbeat_now= 0;
-
     theFacade.lock_mutex();
     for (int i = 1; i < MAX_NODES; i++){
       /**
@@ -264,8 +274,7 @@ ClusterMgr::threadMain( ){
       }
       
       theNode.hbCounter += timeSlept;
-      if (theNode.hbCounter >= theNode.hbFrequency ||
-	  send_heartbeat_now) {
+      if (theNode.hbCounter >= theNode.hbFrequency) {
 	/**
 	 * It is now time to send a new Heartbeat
 	 */
diff --git a/ndb/src/ndbapi/ClusterMgr.hpp b/ndb/src/ndbapi/ClusterMgr.hpp
index b9863821b4f..d2bcc52f7e8 100644
--- a/ndb/src/ndbapi/ClusterMgr.hpp
+++ b/ndb/src/ndbapi/ClusterMgr.hpp
@@ -50,7 +50,7 @@ public:
   void doStop();
   void startThread();
 
-  void forceHB(NodeBitmask waitFor);
+  void forceHB();
 
 private:
   void threadMain();

From 1bd553d41becd0df5fc9935af196ff500f1fdb25 Mon Sep 17 00:00:00 2001
From: unknown <stewart@willster.(none)>
Date: Wed, 9 Aug 2006 15:39:32 +0800
Subject: [PATCH 7/8] remove undeeded printing of newline.

ndb/src/ndbapi/ClusterMgr.cpp:
  remove extra 'ndbout << endl'
---
 ndb/src/ndbapi/ClusterMgr.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp
index 4b3c409e9d4..475561af225 100644
--- a/ndb/src/ndbapi/ClusterMgr.cpp
+++ b/ndb/src/ndbapi/ClusterMgr.cpp
@@ -193,7 +193,6 @@ ClusterMgr::forceHB()
         const ClusterMgr::Node &node= getNodeInfo(i);
         waitForHBFromNodes.bitOR(node.m_state.m_connected_nodes);
       }
-      ndbout << endl;
     }
     waitForHBFromNodes.bitAND(ndb_nodes);
 

From 5cafb623b7f57031805ce399ab1bafdd8c861c4b Mon Sep 17 00:00:00 2001
From: unknown <stewart@willster.(none)>
Date: Tue, 15 Aug 2006 11:09:38 +0800
Subject: [PATCH 8/8] BUG#20823 testBackup FailMaster failing

few cases not handled properly (NF occurs).


ndb/src/kernel/blocks/backup/Backup.cpp:
  Don't write fragment info if we haven't retreived any fragment info yet
  (FailMaster test 2)

  Go directly onto closing files if the tabPtr is RNIL (as we're in recovery)
---
 ndb/src/kernel/blocks/backup/Backup.cpp | 64 ++++++++++++++++---------
 1 file changed, 41 insertions(+), 23 deletions(-)

diff --git a/ndb/src/kernel/blocks/backup/Backup.cpp b/ndb/src/kernel/blocks/backup/Backup.cpp
index 43c1de5e2b3..10318e5f52d 100644
--- a/ndb/src/kernel/blocks/backup/Backup.cpp
+++ b/ndb/src/kernel/blocks/backup/Backup.cpp
@@ -274,36 +274,48 @@ Backup::execCONTINUEB(Signal* signal)
 
     BackupRecordPtr ptr;
     c_backupPool.getPtr(ptr, ptr_I);
-    TablePtr tabPtr;
-    ptr.p->tables.getPtr(tabPtr, tabPtr_I);
-    FragmentPtr fragPtr;
-    tabPtr.p->fragments.getPtr(fragPtr, fragPtr_I);
 
-    BackupFilePtr filePtr;
-    ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
-
-    const Uint32 sz = sizeof(BackupFormat::CtlFile::FragmentInfo) >> 2;
-    Uint32 * dst;
-    if (!filePtr.p->operation.dataBuffer.getWritePtr(&dst, sz))
+    if (tabPtr_I == RNIL)
     {
-      sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 4);
+      closeFiles(signal, ptr);
       return;
     }
+    jam();
+    TablePtr tabPtr;
+    ptr.p->tables.getPtr(tabPtr, tabPtr_I);
+    jam();
+    if(tabPtr.p->fragments.getSize())
+    {
+      FragmentPtr fragPtr;
+      tabPtr.p->fragments.getPtr(fragPtr, fragPtr_I);
 
-    BackupFormat::CtlFile::FragmentInfo * fragInfo = 
-      (BackupFormat::CtlFile::FragmentInfo*)dst;
-    fragInfo->SectionType = htonl(BackupFormat::FRAGMENT_INFO);
-    fragInfo->SectionLength = htonl(sz);
-    fragInfo->TableId = htonl(fragPtr.p->tableId);
-    fragInfo->FragmentNo = htonl(fragPtr_I);
-    fragInfo->NoOfRecordsLow = htonl(fragPtr.p->noOfRecords & 0xFFFFFFFF);
-    fragInfo->NoOfRecordsHigh = htonl(fragPtr.p->noOfRecords >> 32);
-    fragInfo->FilePosLow = htonl(0 & 0xFFFFFFFF);
-    fragInfo->FilePosHigh = htonl(0 >> 32);
+      BackupFilePtr filePtr;
+      ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
 
-    filePtr.p->operation.dataBuffer.updateWritePtr(sz);
+      const Uint32 sz = sizeof(BackupFormat::CtlFile::FragmentInfo) >> 2;
+      Uint32 * dst;
+      if (!filePtr.p->operation.dataBuffer.getWritePtr(&dst, sz))
+      {
+        sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 4);
+        return;
+      }
+
+      BackupFormat::CtlFile::FragmentInfo * fragInfo = 
+        (BackupFormat::CtlFile::FragmentInfo*)dst;
+      fragInfo->SectionType = htonl(BackupFormat::FRAGMENT_INFO);
+      fragInfo->SectionLength = htonl(sz);
+      fragInfo->TableId = htonl(fragPtr.p->tableId);
+      fragInfo->FragmentNo = htonl(fragPtr_I);
+      fragInfo->NoOfRecordsLow = htonl(fragPtr.p->noOfRecords & 0xFFFFFFFF);
+      fragInfo->NoOfRecordsHigh = htonl(fragPtr.p->noOfRecords >> 32);
+      fragInfo->FilePosLow = htonl(0 & 0xFFFFFFFF);
+      fragInfo->FilePosHigh = htonl(0 >> 32);
+
+      filePtr.p->operation.dataBuffer.updateWritePtr(sz);
+
+      fragPtr_I++;
+    }
 
-    fragPtr_I++;
     if (fragPtr_I == tabPtr.p->fragments.getSize())
     {
       signal->theData[0] = tabPtr.p->tableId;
@@ -4243,6 +4255,12 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal)
       TablePtr tabPtr;
       ptr.p->tables.first(tabPtr);
 
+      if (tabPtr.i == RNIL)
+      {
+        closeFiles(signal, ptr);
+        return;
+      }
+
       signal->theData[0] = BackupContinueB::BACKUP_FRAGMENT_INFO;
       signal->theData[1] = ptr.i;
       signal->theData[2] = tabPtr.i;