mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
ndb - bug#26457
master failure during master take over ndb/src/kernel/blocks/ERROR_codes.txt: new error code ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: Make sure to clear NF_XX_LCP if master fails during master take-over ndb/test/include/NdbRestarter.hpp: Add support for querying next master and node group (for multi node failure testing) ndb/test/ndbapi/testNodeRestart.cpp: testcase ndb/test/run-test/daily-basic-tests.txt: testcase ndb/test/src/NdbRestarter.cpp: Add support for querying next master and node group (for multi node failure testing)
This commit is contained in:
@ -5,7 +5,7 @@ Next DBACC 3002
|
|||||||
Next DBTUP 4014
|
Next DBTUP 4014
|
||||||
Next DBLQH 5043
|
Next DBLQH 5043
|
||||||
Next DBDICT 6007
|
Next DBDICT 6007
|
||||||
Next DBDIH 7178
|
Next DBDIH 7181
|
||||||
Next DBTC 8039
|
Next DBTC 8039
|
||||||
Next CMVMI 9000
|
Next CMVMI 9000
|
||||||
Next BACKUP 10022
|
Next BACKUP 10022
|
||||||
@ -71,6 +71,8 @@ Delay GCP_SAVEREQ by 10 secs
|
|||||||
|
|
||||||
7177: Delay copying of sysfileData in execCOPY_GCIREQ
|
7177: Delay copying of sysfileData in execCOPY_GCIREQ
|
||||||
|
|
||||||
|
7180: Crash master during master-take-over in execMASTER_LCPCONF
|
||||||
|
|
||||||
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
|
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
|
||||||
-----------------------------------------------------------------
|
-----------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -4612,6 +4612,8 @@ void
|
|||||||
Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
|
Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
|
||||||
jam();
|
jam();
|
||||||
|
|
||||||
|
Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId;
|
||||||
|
|
||||||
c_lcpMasterTakeOverState.minTableId = ~0;
|
c_lcpMasterTakeOverState.minTableId = ~0;
|
||||||
c_lcpMasterTakeOverState.minFragId = ~0;
|
c_lcpMasterTakeOverState.minFragId = ~0;
|
||||||
c_lcpMasterTakeOverState.failedNodeId = nodeId;
|
c_lcpMasterTakeOverState.failedNodeId = nodeId;
|
||||||
@ -4630,7 +4632,20 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
|
|||||||
/**
|
/**
|
||||||
* Node failure during master take over...
|
* Node failure during master take over...
|
||||||
*/
|
*/
|
||||||
ndbout_c("Nodefail during master take over");
|
ndbout_c("Nodefail during master take over (old: %d)", oldNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
NodeRecordPtr nodePtr;
|
||||||
|
nodePtr.i = oldNode;
|
||||||
|
if (oldNode > 0 && oldNode < MAX_NDB_NODES)
|
||||||
|
{
|
||||||
|
jam();
|
||||||
|
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
|
||||||
|
if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER))
|
||||||
|
{
|
||||||
|
jam();
|
||||||
|
checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
|
setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
|
||||||
@ -5646,6 +5661,14 @@ void Dbdih::execMASTER_LCPREQ(Signal* signal)
|
|||||||
jamEntry();
|
jamEntry();
|
||||||
const BlockReference newMasterBlockref = req->masterRef;
|
const BlockReference newMasterBlockref = req->masterRef;
|
||||||
|
|
||||||
|
if (newMasterBlockref != cmasterdihref)
|
||||||
|
{
|
||||||
|
jam();
|
||||||
|
ndbout_c("resending GSN_MASTER_LCPREQ");
|
||||||
|
sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
|
||||||
|
signal->getLength(), 50);
|
||||||
|
return;
|
||||||
|
}
|
||||||
Uint32 failedNodeId = req->failedNodeId;
|
Uint32 failedNodeId = req->failedNodeId;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -5946,6 +5969,8 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal)
|
|||||||
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
|
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
|
||||||
nodePtr.p->lcpStateAtTakeOver = lcpState;
|
nodePtr.p->lcpStateAtTakeOver = lcpState;
|
||||||
|
|
||||||
|
CRASH_INSERTION(7180);
|
||||||
|
|
||||||
#ifdef VM_TRACE
|
#ifdef VM_TRACE
|
||||||
ndbout_c("MASTER_LCPCONF");
|
ndbout_c("MASTER_LCPCONF");
|
||||||
printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
|
printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
|
||||||
|
@ -62,6 +62,8 @@ public:
|
|||||||
int dumpStateAllNodes(int * _args, int _num_args);
|
int dumpStateAllNodes(int * _args, int _num_args);
|
||||||
|
|
||||||
int getMasterNodeId();
|
int getMasterNodeId();
|
||||||
|
int getNextMasterNodeId(int nodeId);
|
||||||
|
int getNodeGroup(int nodeId);
|
||||||
int getRandomNodeSameNodeGroup(int nodeId, int randomNumber);
|
int getRandomNodeSameNodeGroup(int nodeId, int randomNumber);
|
||||||
int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber);
|
int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber);
|
||||||
int getRandomNotMasterNodeId(int randomNumber);
|
int getRandomNotMasterNodeId(int randomNumber);
|
||||||
|
@ -1045,6 +1045,45 @@ int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
|
|||||||
return NDBT_OK;
|
return NDBT_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
runBug26457(NDBT_Context* ctx, NDBT_Step* step)
|
||||||
|
{
|
||||||
|
NdbRestarter res;
|
||||||
|
if (res.getNumDbNodes() < 4)
|
||||||
|
return NDBT_OK;
|
||||||
|
|
||||||
|
int loops = ctx->getNumLoops();
|
||||||
|
while (loops --)
|
||||||
|
{
|
||||||
|
retry:
|
||||||
|
int master = res.getMasterNodeId();
|
||||||
|
int next = res.getNextMasterNodeId(master);
|
||||||
|
|
||||||
|
ndbout_c("master: %d next: %d", master, next);
|
||||||
|
|
||||||
|
if (res.getNodeGroup(master) == res.getNodeGroup(next))
|
||||||
|
{
|
||||||
|
res.restartOneDbNode(next, false, false, true);
|
||||||
|
if (res.waitClusterStarted())
|
||||||
|
return NDBT_FAILED;
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2 };
|
||||||
|
|
||||||
|
if (res.dumpStateOneNode(next, val2, 2))
|
||||||
|
return NDBT_FAILED;
|
||||||
|
|
||||||
|
if (res.insertErrorInNode(next, 7180))
|
||||||
|
return NDBT_FAILED;
|
||||||
|
|
||||||
|
res.restartOneDbNode(master, false, false, true);
|
||||||
|
if (res.waitClusterStarted())
|
||||||
|
return NDBT_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NDBT_OK;
|
||||||
|
}
|
||||||
|
|
||||||
NDBT_TESTSUITE(testNodeRestart);
|
NDBT_TESTSUITE(testNodeRestart);
|
||||||
TESTCASE("NoLoad",
|
TESTCASE("NoLoad",
|
||||||
@ -1367,6 +1406,9 @@ TESTCASE("Bug25364", ""){
|
|||||||
TESTCASE("Bug25554", ""){
|
TESTCASE("Bug25554", ""){
|
||||||
INITIALIZER(runBug25554);
|
INITIALIZER(runBug25554);
|
||||||
}
|
}
|
||||||
|
TESTCASE("Bug26457", ""){
|
||||||
|
INITIALIZER(runBug26457);
|
||||||
|
}
|
||||||
NDBT_TESTSUITE_END(testNodeRestart);
|
NDBT_TESTSUITE_END(testNodeRestart);
|
||||||
|
|
||||||
int main(int argc, const char** argv){
|
int main(int argc, const char** argv){
|
||||||
|
@ -477,6 +477,10 @@ max-time: 1000
|
|||||||
cmd: testNodeRestart
|
cmd: testNodeRestart
|
||||||
args: -n Bug25554 T1
|
args: -n Bug25554 T1
|
||||||
|
|
||||||
|
max-time: 1000
|
||||||
|
cmd: testNodeRestart
|
||||||
|
args: -n Bug26457 T1
|
||||||
|
|
||||||
# OLD FLEX
|
# OLD FLEX
|
||||||
max-time: 500
|
max-time: 500
|
||||||
cmd: flexBench
|
cmd: flexBench
|
||||||
|
@ -128,6 +128,68 @@ NdbRestarter::getMasterNodeId(){
|
|||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
NdbRestarter::getNodeGroup(int nodeId){
|
||||||
|
if (!isConnected())
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (getStatus() != 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for(size_t i = 0; i < ndbNodes.size(); i++)
|
||||||
|
{
|
||||||
|
if(ndbNodes[i].node_id == nodeId)
|
||||||
|
{
|
||||||
|
return ndbNodes[i].node_group;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
NdbRestarter::getNextMasterNodeId(int nodeId){
|
||||||
|
if (!isConnected())
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (getStatus() != 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
size_t i;
|
||||||
|
for(i = 0; i < ndbNodes.size(); i++)
|
||||||
|
{
|
||||||
|
if(ndbNodes[i].node_id == nodeId)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(i < ndbNodes.size());
|
||||||
|
if (i == ndbNodes.size())
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
int dynid = ndbNodes[i].dynamic_id;
|
||||||
|
int minid = dynid;
|
||||||
|
for (i = 0; i<ndbNodes.size(); i++)
|
||||||
|
if (ndbNodes[i].dynamic_id > minid)
|
||||||
|
minid = ndbNodes[i].dynamic_id;
|
||||||
|
|
||||||
|
for (i = 0; i<ndbNodes.size(); i++)
|
||||||
|
if (ndbNodes[i].dynamic_id > dynid &&
|
||||||
|
ndbNodes[i].dynamic_id < minid)
|
||||||
|
{
|
||||||
|
minid = ndbNodes[i].dynamic_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minid != ~0)
|
||||||
|
{
|
||||||
|
for (i = 0; i<ndbNodes.size(); i++)
|
||||||
|
if (ndbNodes[i].dynamic_id == minid)
|
||||||
|
return ndbNodes[i].node_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
return getMasterNodeId();
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
NdbRestarter::getRandomNotMasterNodeId(int rand){
|
NdbRestarter::getRandomNotMasterNodeId(int rand){
|
||||||
int master = getMasterNodeId();
|
int master = getMasterNodeId();
|
||||||
|
Reference in New Issue
Block a user