From 85fdd106bd4cd9a8e9d9410ceadfad6644d2120d Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 19 Feb 2007 20:13:08 +0100 Subject: [PATCH] ndb - bug#26481 fix for killed node during initial node restart ndb/src/kernel/blocks/dbdih/Dbdih.hpp: add flag for initial node restart, to keep track on when to concider it to be "done" ndb/src/kernel/blocks/dbdih/DbdihInit.cpp: add flag for initial node restart, to keep track on when to concider it to be "done" ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: add flag for initial node restart, to keep track on when to concider it to be "done" ndb/src/kernel/blocks/dblqh/DblqhMain.cpp: Check cstartRecReq also when refusing GCP_SAVEREQ ndb/test/ndbapi/testNodeRestart.cpp: add testcase --- ndb/src/kernel/blocks/dbdih/Dbdih.hpp | 1 + ndb/src/kernel/blocks/dbdih/DbdihInit.cpp | 1 + ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 22 ++++++++++++ ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 3 +- ndb/test/ndbapi/testNodeRestart.cpp | 43 +++++++++++++++++++++++ 5 files changed, 69 insertions(+), 1 deletion(-) diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp index f1b3897c76f..41240a2d620 100644 --- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp @@ -1368,6 +1368,7 @@ private: Uint32 csystemnodes; Uint32 currentgcp; Uint32 c_newest_restorable_gci; + Uint32 c_set_initial_start_flag; enum GcpMasterTakeOverState { GMTOS_IDLE = 0, diff --git a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp index bb96c4b8831..4e71ecd5fe2 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp @@ -75,6 +75,7 @@ void Dbdih::initData() c_blockCommit = false; c_blockCommitNo = 1; cntrlblockref = RNIL; + c_set_initial_start_flag = FALSE; }//Dbdih::initData() void Dbdih::initRecords() diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 0e6fe4714b6..bf71bc56723 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -667,6 +667,12 @@ done: { jam(); memcpy(sysfileData, cdata, sizeof(sysfileData)); + + if (c_set_initial_start_flag) + { + jam(); + Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits); + } } c_copyGCISlave.m_copyReason = reason; @@ -1260,6 +1266,11 @@ void Dbdih::execNDB_STTOR(Signal* signal) // The permission is given by the master node in the alive set. /*-----------------------------------------------------------------------*/ createMutexes(signal, 0); + if (cstarttype == NodeState::ST_INITIAL_NODE_RESTART) + { + jam(); + c_set_initial_start_flag = TRUE; // In sysfile... + } break; case ZNDB_SPH3: @@ -10277,6 +10288,17 @@ Dbdih::sendLCP_COMPLETE_REP(Signal* signal){ sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal, LcpCompleteRep::SignalLength, JBB); + + /** + * Say that an initial node restart does not need to be redone + * once node has been part of first LCP + */ + if (c_set_initial_start_flag && + c_lcpState.m_participatingLQH.get(getOwnNodeId())) + { + jam(); + c_set_initial_start_flag = FALSE; + } } /*-------------------------------------------------------------------------- */ diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 3d93ce3fc31..ab5deca27b5 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -11673,7 +11673,8 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal) return; } - if(getNodeState().getNodeRestartInProgress()){ + if(getNodeState().getNodeRestartInProgress() && cstartRecReq == ZFALSE) + { GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0]; saveRef->dihPtr = dihPtr; saveRef->nodeId = getOwnNodeId(); diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index e729e8179b5..c8c8ddd88d6 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -1085,6 +1085,46 @@ retry: return NDBT_OK; } +int +runBug26481(NDBT_Context* ctx, NDBT_Step* step) +{ + + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + NdbRestarter res; + + int node = res.getRandomNotMasterNodeId(rand()); + ndbout_c("node: %d", node); + if (res.restartOneDbNode(node, true, true, true)) + return NDBT_FAILED; + + if (res.waitNodesNoStart(&node, 1)) + return NDBT_FAILED; + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + if (res.dumpStateOneNode(node, val2, 2)) + return NDBT_FAILED; + + if (res.insertErrorInNode(node, 7018)) + return NDBT_FAILED; + + if (res.startNodes(&node, 1)) + return NDBT_FAILED; + + res.waitNodesStartPhase(&node, 1, 3); + + if (res.waitNodesNoStart(&node, 1)) + return NDBT_FAILED; + + res.startNodes(&node, 1); + + if (res.waitClusterStarted()) + return NDBT_FAILED; + + return NDBT_OK; +} + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ @@ -1409,6 +1449,9 @@ TESTCASE("Bug25554", ""){ TESTCASE("Bug26457", ""){ INITIALIZER(runBug26457); } +TESTCASE("Bug26481", ""){ + INITIALIZER(runBug26481); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){