mirror of
https://github.com/MariaDB/server.git
synced 2025-07-05 12:42:17 +03:00
bug#28717, make sure only master updates activeStatus
so that othernodes dont get confused after having recevied status from master and then tries to update it self ndb/src/kernel/blocks/ERROR_codes.txt: error 1001, delay node_failrep ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp: error 1001, delay node_failrep ndb/test/ndbapi/testNodeRestart.cpp: testcase ndb/test/run-test/daily-basic-tests.txt: testcase
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
Next QMGR 1
|
||||
Next NDBCNTR 1000
|
||||
Next NDBCNTR 1002
|
||||
Next NDBFS 2000
|
||||
Next DBACC 3002
|
||||
Next DBTUP 4014
|
||||
@ -487,3 +487,8 @@ Dbdict:
|
||||
6003 Crash in participant @ CreateTabReq::Prepare
|
||||
6004 Crash in participant @ CreateTabReq::Commit
|
||||
6005 Crash in participant @ CreateTabReq::CreateDrop
|
||||
|
||||
Ndbcntr:
|
||||
--------
|
||||
|
||||
1001: Delay sending NODE_FAILREP (to own node), until error is cleared
|
||||
|
@ -4448,12 +4448,18 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr)
|
||||
jam();
|
||||
const Uint32 nodeId = failedNodePtr.i;
|
||||
|
||||
if (c_lcpState.m_participatingLQH.get(failedNodePtr.i)){
|
||||
if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i))
|
||||
{
|
||||
/*----------------------------------------------------*/
|
||||
/* THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE */
|
||||
/* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT */
|
||||
/* THE NODE HAVE MISSED A LOCAL CHECKPOINT. */
|
||||
/*----------------------------------------------------*/
|
||||
|
||||
/**
|
||||
* Bug#28717, Only master should do this, as this status is copied
|
||||
* to other nodes
|
||||
*/
|
||||
switch (failedNodePtr.p->activeStatus) {
|
||||
case Sysfile::NS_Active:
|
||||
jam();
|
||||
|
@ -1375,6 +1375,13 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal)
|
||||
{
|
||||
jamEntry();
|
||||
|
||||
if (ERROR_INSERTED(1001))
|
||||
{
|
||||
sendSignalWithDelay(reference(), GSN_NODE_FAILREP, signal, 100,
|
||||
signal->getLength());
|
||||
return;
|
||||
}
|
||||
|
||||
const NodeFailRep * nodeFail = (NodeFailRep *)&signal->theData[0];
|
||||
NdbNodeBitmask allFailed;
|
||||
allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes);
|
||||
|
@ -1045,6 +1045,84 @@ int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
runBug28717(NDBT_Context* ctx, NDBT_Step* step)
|
||||
{
|
||||
int result = NDBT_OK;
|
||||
int loops = ctx->getNumLoops();
|
||||
int records = ctx->getNumRecords();
|
||||
Ndb* pNdb = GETNDB(step);
|
||||
NdbRestarter res;
|
||||
|
||||
if (res.getNumDbNodes() < 4)
|
||||
{
|
||||
return NDBT_OK;
|
||||
}
|
||||
|
||||
int master = res.getMasterNodeId();
|
||||
int node0 = res.getRandomNodeOtherNodeGroup(master, rand());
|
||||
int node1 = res.getRandomNodeSameNodeGroup(node0, rand());
|
||||
|
||||
ndbout_c("master: %d node0: %d node1: %d", master, node0, node1);
|
||||
|
||||
if (res.restartOneDbNode(node0, false, true, true))
|
||||
{
|
||||
return NDBT_FAILED;
|
||||
}
|
||||
|
||||
{
|
||||
int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
|
||||
NdbLogEventHandle handle =
|
||||
ndb_mgm_create_logevent_handle(res.handle, filter);
|
||||
|
||||
|
||||
int dump[] = { DumpStateOrd::DihStartLcpImmediately };
|
||||
struct ndb_logevent event;
|
||||
|
||||
for (Uint32 i = 0; i<3; i++)
|
||||
{
|
||||
res.dumpStateOneNode(master, dump, 1);
|
||||
while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
|
||||
event.type != NDB_LE_LocalCheckpointStarted);
|
||||
while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
|
||||
event.type != NDB_LE_LocalCheckpointCompleted);
|
||||
}
|
||||
}
|
||||
|
||||
if (res.waitNodesNoStart(&node0, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
|
||||
|
||||
if (res.dumpStateOneNode(node0, val2, 2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (res.insertErrorInNode(node0, 5010))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (res.insertErrorInNode(node1, 1001))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (res.startNodes(&node0, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
NdbSleep_SecSleep(3);
|
||||
|
||||
if (res.insertErrorInNode(node1, 0))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (res.waitNodesNoStart(&node0, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (res.startNodes(&node0, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (res.waitClusterStarted())
|
||||
return NDBT_FAILED;
|
||||
|
||||
return NDBT_OK;
|
||||
}
|
||||
|
||||
NDBT_TESTSUITE(testNodeRestart);
|
||||
TESTCASE("NoLoad",
|
||||
"Test that one node at a time can be stopped and then restarted "\
|
||||
@ -1366,6 +1444,9 @@ TESTCASE("Bug25364", ""){
|
||||
TESTCASE("Bug25554", ""){
|
||||
INITIALIZER(runBug25554);
|
||||
}
|
||||
TESTCASE("Bug28717", ""){
|
||||
INITIALIZER(runBug28717);
|
||||
}
|
||||
NDBT_TESTSUITE_END(testNodeRestart);
|
||||
|
||||
int main(int argc, const char** argv){
|
||||
|
@ -492,6 +492,10 @@ max-time: 1500
|
||||
cmd: testDict
|
||||
args: -n CreateAndDrop
|
||||
|
||||
max-time: 1000
|
||||
cmd: testNodeRestart
|
||||
args: -n Bug28717 T1
|
||||
|
||||
max-time: 1500
|
||||
cmd: testDict
|
||||
args: -n CreateAndDropAtRandom -l 200 T1
|
||||
|
Reference in New Issue
Block a user