From 40f44b48b0f44f9eb7f13dc7c2e28e56ad805634 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 8 Jun 2006 16:16:07 +0200 Subject: [PATCH 1/5] ndb - bug#18781 lock DICT during node restart ndb/src/kernel/main.cpp: signal log from start (#if 0-ed) ndb/test/ndbapi/testDict.cpp: test NF/NR + dict ops ndb/src/kernel/vm/DLFifoList.hpp: add hasPrev ndb/src/kernel/vm/pc.hpp: ERROR_INSERTED_CLEAR(x) test and clear if set ndb/src/common/debugger/SignalLoggerManager.cpp: block no fix ndb/src/kernel/blocks/qmgr/QmgrMain.cpp: spelling ndb/include/kernel/GlobalSignalNumbers.h: locking of master DICT against schema ops, used by slave DIH under NR ndb/include/kernel/signaldata/AlterTable.hpp: locking of master DICT against schema ops, used by slave DIH under NR ndb/include/kernel/signaldata/CreateTable.hpp: locking of master DICT against schema ops, used by slave DIH under NR ndb/include/kernel/signaldata/DictLock.hpp: locking of master DICT against schema ops, used by slave DIH under NR ndb/include/kernel/signaldata/DropTable.hpp: locking of master DICT against schema ops, used by slave DIH under NR ndb/src/common/debugger/signaldata/SignalNames.cpp: locking of master DICT against schema ops, used by slave DIH under NR ndb/src/kernel/blocks/ERROR_codes.txt: locking of master DICT against schema ops, used by slave DIH under NR ndb/src/kernel/blocks/dbdict/Dbdict.cpp: locking of master DICT against schema ops, used by slave DIH under NR ndb/src/kernel/blocks/dbdict/Dbdict.hpp: locking of master DICT against schema ops, used by slave DIH under NR ndb/src/kernel/blocks/dbdih/Dbdih.hpp: locking of master DICT against schema ops, used by slave DIH under NR ndb/src/kernel/blocks/dbdih/DbdihInit.cpp: locking of master DICT against schema ops, used by slave DIH under NR ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: locking of master DICT against schema ops, used by slave DIH under NR ndb/src/ndbapi/ndberror.c: locking of master DICT against schema ops, used by slave DIH under NR --- ndb/include/kernel/GlobalSignalNumbers.h | 10 +- ndb/include/kernel/signaldata/AlterTable.hpp | 1 + ndb/include/kernel/signaldata/CreateTable.hpp | 1 + ndb/include/kernel/signaldata/DictLock.hpp | 76 +++++ ndb/include/kernel/signaldata/DropTable.hpp | 1 + .../common/debugger/SignalLoggerManager.cpp | 2 +- .../debugger/signaldata/SignalNames.cpp | 6 + ndb/src/kernel/blocks/ERROR_codes.txt | 4 +- ndb/src/kernel/blocks/dbdict/Dbdict.cpp | 245 +++++++++++++- ndb/src/kernel/blocks/dbdict/Dbdict.hpp | 97 +++++- ndb/src/kernel/blocks/dbdih/Dbdih.hpp | 28 ++ ndb/src/kernel/blocks/dbdih/DbdihInit.cpp | 6 + ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 133 +++++++- ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 4 +- ndb/src/kernel/main.cpp | 4 + ndb/src/kernel/vm/DLFifoList.hpp | 14 + ndb/src/kernel/vm/pc.hpp | 2 + ndb/src/ndbapi/ndberror.c | 1 + ndb/test/ndbapi/testDict.cpp | 304 ++++++++++++++++++ 19 files changed, 924 insertions(+), 15 deletions(-) create mode 100644 ndb/include/kernel/signaldata/DictLock.hpp diff --git a/ndb/include/kernel/GlobalSignalNumbers.h b/ndb/include/kernel/GlobalSignalNumbers.h index 98b6ce7d949..d60f7a2c582 100644 --- a/ndb/include/kernel/GlobalSignalNumbers.h +++ b/ndb/include/kernel/GlobalSignalNumbers.h @@ -507,16 +507,12 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES; #define GSN_TEST_ORD 407 #define GSN_TESTSIG 408 #define GSN_TIME_SIGNAL 409 -/* 410 unused */ -/* 411 unused */ -/* 412 unused */ #define GSN_TUP_ABORTREQ 414 #define GSN_TUP_ADD_ATTCONF 415 #define GSN_TUP_ADD_ATTRREF 416 #define GSN_TUP_ADD_ATTRREQ 417 #define GSN_TUP_ATTRINFO 418 #define GSN_TUP_COMMITREQ 419 -/* 420 unused */ #define GSN_TUP_LCPCONF 421 #define GSN_TUP_LCPREF 422 #define GSN_TUP_LCPREQ 423 @@ -938,4 +934,10 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES; #define GSN_ACC_LOCKREQ 711 #define GSN_READ_PSUEDO_REQ 712 +/* DICT LOCK signals */ +#define GSN_DICT_LOCK_REQ 410 +#define GSN_DICT_LOCK_CONF 411 +#define GSN_DICT_LOCK_REF 412 +#define GSN_DICT_UNLOCK_ORD 420 + #endif diff --git a/ndb/include/kernel/signaldata/AlterTable.hpp b/ndb/include/kernel/signaldata/AlterTable.hpp index 16c9eb204c9..f5006c27fdb 100644 --- a/ndb/include/kernel/signaldata/AlterTable.hpp +++ b/ndb/include/kernel/signaldata/AlterTable.hpp @@ -114,6 +114,7 @@ public: InvalidTableVersion = 241, DropInProgress = 283, Busy = 701, + BusyWithNR = 711, NotMaster = 702, InvalidFormat = 703, AttributeNameTooLong = 704, diff --git a/ndb/include/kernel/signaldata/CreateTable.hpp b/ndb/include/kernel/signaldata/CreateTable.hpp index 481b323fdb0..7d3189cc126 100644 --- a/ndb/include/kernel/signaldata/CreateTable.hpp +++ b/ndb/include/kernel/signaldata/CreateTable.hpp @@ -77,6 +77,7 @@ public: enum ErrorCode { NoError = 0, Busy = 701, + BusyWithNR = 711, NotMaster = 702, InvalidFormat = 703, AttributeNameTooLong = 704, diff --git a/ndb/include/kernel/signaldata/DictLock.hpp b/ndb/include/kernel/signaldata/DictLock.hpp new file mode 100644 index 00000000000..c8f919f65a8 --- /dev/null +++ b/ndb/include/kernel/signaldata/DictLock.hpp @@ -0,0 +1,76 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef DICT_LOCK_HPP +#define DICT_LOCK_HPP + +#include "SignalData.hpp" + +// see comments in Dbdict.hpp + +class DictLockReq { + friend class Dbdict; + friend class Dbdih; +public: + STATIC_CONST( SignalLength = 3 ); + enum LockType { + NoLock = 0, + NodeRestartLock = 1 + }; +private: + Uint32 userPtr; + Uint32 lockType; + Uint32 userRef; +}; + +class DictLockConf { + friend class Dbdict; + friend class Dbdih; +public: + STATIC_CONST( SignalLength = 3 ); +private: + Uint32 userPtr; + Uint32 lockType; + Uint32 lockPtr; +}; + +class DictLockRef { + friend class Dbdict; + friend class Dbdih; +public: + STATIC_CONST( SignalLength = 3 ); + enum ErrorCode { + NotMaster = 1, + InvalidLockType = 2, + TooManyRequests = 3 + }; +private: + Uint32 userPtr; + Uint32 lockType; + Uint32 errorCode; +}; + +class DictUnlockOrd { + friend class Dbdict; + friend class Dbdih; +public: + STATIC_CONST( SignalLength = 2 ); +private: + Uint32 lockPtr; + Uint32 lockType; +}; + +#endif diff --git a/ndb/include/kernel/signaldata/DropTable.hpp b/ndb/include/kernel/signaldata/DropTable.hpp index cae6aff8754..e762446d2b8 100644 --- a/ndb/include/kernel/signaldata/DropTable.hpp +++ b/ndb/include/kernel/signaldata/DropTable.hpp @@ -53,6 +53,7 @@ public: enum ErrorCode { Busy = 701, + BusyWithNR = 711, NotMaster = 702, NoSuchTable = 709, InvalidTableVersion = 241, diff --git a/ndb/src/common/debugger/SignalLoggerManager.cpp b/ndb/src/common/debugger/SignalLoggerManager.cpp index d8710d2058f..67e13dc805a 100644 --- a/ndb/src/common/debugger/SignalLoggerManager.cpp +++ b/ndb/src/common/debugger/SignalLoggerManager.cpp @@ -139,7 +139,7 @@ SignalLoggerManager::log(LogMode logMode, const char * params) } else { for (int i = 0; i < count; ++i){ BlockNumber number = getBlockNo(blocks[i]); - cnt += log(SLM_ON, number-MIN_BLOCK_NO, logMode); + cnt += log(SLM_ON, number, logMode); } } for(int i = 0; ireq.userRef), lockPtr.p->lt->text); +} + +void +Dbdict::execDICT_LOCK_REQ(Signal* signal) +{ + jamEntry(); + const DictLockReq* req = (const DictLockReq*)&signal->theData[0]; + + if (getOwnNodeId() != c_masterNodeId) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::NotMaster); + return; + } + + const DictLockType* lt = getDictLockType(req->lockType); + if (lt == NULL) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::InvalidLockType); + return; + } + + DictLockPtr lockPtr; + if (! c_dictLockQueue.seize(lockPtr)) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::TooManyRequests); + return; + } + + lockPtr.p->req = *req; + lockPtr.p->locked = false; + lockPtr.p->lt = lt; + + checkDictLockQueue(signal); + + if (! lockPtr.p->locked) + sendDictLockInfoEvent(lockPtr, "lock request by node"); +} + +void +Dbdict::checkDictLockQueue(Signal* signal) +{ + DictLockPtr lockPtr; + + do { + if (! c_dictLockQueue.first(lockPtr)) { + jam(); + setDictLockPoll(signal, false); + return; + } + + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + break; + } + + if (c_opRecordPool.getNoOfFree() != c_opRecordPool.getSize()) { + jam(); + break; + } + + ndbrequire(c_blockState == BS_IDLE); + lockPtr.p->locked = true; + c_blockState = lockPtr.p->lt->blockState; + sendDictLockConf(signal, lockPtr); + + sendDictLockInfoEvent(lockPtr, "locked by node"); + } while (0); + + // poll while first request is open + // this routine is called again when it is removed for any reason + + bool on = ! lockPtr.p->locked; + setDictLockPoll(signal, on); +} + +void +Dbdict::execDICT_UNLOCK_ORD(Signal* signal) +{ + jamEntry(); + const DictUnlockOrd* ord = (const DictUnlockOrd*)&signal->theData[0]; + + DictLockPtr lockPtr; + c_dictLockQueue.getPtr(lockPtr, ord->lockPtr); + ndbrequire(lockPtr.p->lt->lockType == ord->lockType); + + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize()); + ndbrequire(! c_dictLockQueue.hasPrev(lockPtr)); + + c_blockState = BS_IDLE; + sendDictLockInfoEvent(lockPtr, "unlocked by node"); + } else { + sendDictLockInfoEvent(lockPtr, "lock request removed by node"); + } + + c_dictLockQueue.release(lockPtr); + + checkDictLockQueue(signal); +} + +void +Dbdict::sendDictLockConf(Signal* signal, DictLockPtr lockPtr) +{ + DictLockConf* conf = (DictLockConf*)&signal->theData[0]; + const DictLockReq& req = lockPtr.p->req; + + conf->userPtr = req.userPtr; + conf->lockType = req.lockType; + conf->lockPtr = lockPtr.i; + + sendSignal(req.userRef, GSN_DICT_LOCK_CONF, signal, + DictLockConf::SignalLength, JBB); +} + +void +Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode) +{ + DictLockRef* ref = (DictLockRef*)&signal->theData[0]; + + ref->userPtr = req.userPtr; + ref->lockType = req.lockType; + ref->errorCode = errorCode; + + sendSignal(req.userRef, GSN_DICT_LOCK_REF, signal, + DictLockRef::SignalLength, JBB); +} + +// control polling + +void +Dbdict::setDictLockPoll(Signal* signal, bool on) +{ + if (on) { + jam(); + signal->theData[0] = ZDICT_LOCK_POLL; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1); + } + + if (c_dictLockPoll != on) { + jam(); +#ifdef VM_TRACE + infoEvent("DICT: lock polling %s", on ? "On" : "Off"); +#endif + c_dictLockPoll = on; + } +} + +// NF handling + +void +Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes) +{ + DictLockPtr loopPtr; + c_dictLockQueue.first(loopPtr); + + while (loopPtr.i != RNIL) { + jam(); + DictLockPtr lockPtr = loopPtr; + c_dictLockQueue.next(loopPtr); + + Uint32 nodeId = refToNode(lockPtr.p->req.userRef); + + if (NodeBitmask::get(theFailedNodes, nodeId)) { + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize()); + ndbrequire(! c_dictLockQueue.hasPrev(lockPtr)); + + c_blockState = BS_IDLE; + + sendDictLockInfoEvent(lockPtr, "remove lock by failed node"); + } else { + sendDictLockInfoEvent(lockPtr, "remove lock request by failed node"); + } + + c_dictLockQueue.release(lockPtr); + } + } + + checkDictLockQueue(signal); +} + + /* **************************************************************** */ /* ---------------------------------------------------------------- */ /* MODULE: STORE/RESTORE SCHEMA FILE---------------------- */ diff --git a/ndb/src/kernel/blocks/dbdict/Dbdict.hpp b/ndb/src/kernel/blocks/dbdict/Dbdict.hpp index 6b78fb86534..fbad67d8822 100644 --- a/ndb/src/kernel/blocks/dbdict/Dbdict.hpp +++ b/ndb/src/kernel/blocks/dbdict/Dbdict.hpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,7 @@ #include #include #include +#include #include "SchemaFile.hpp" #include #include @@ -63,6 +65,7 @@ /*--------------------------------------------------------------*/ #define ZPACK_TABLE_INTO_PAGES 0 #define ZSEND_GET_TAB_RESPONSE 3 +#define ZDICT_LOCK_POLL 4 /*--------------------------------------------------------------*/ @@ -587,6 +590,9 @@ private: void execALTER_TAB_CONF(Signal* signal); bool check_ndb_versions() const; + void execDICT_LOCK_REQ(Signal* signal); + void execDICT_UNLOCK_ORD(Signal* signal); + /* * 2.4 COMMON STORED VARIABLES */ @@ -817,12 +823,43 @@ private: // State variables /* ----------------------------------------------------------------------- */ +#ifndef ndb_dbdict_log_block_state enum BlockState { BS_IDLE = 0, BS_CREATE_TAB = 1, BS_BUSY = 2, - BS_NODE_FAILURE = 3 + BS_NODE_FAILURE = 3, + BS_NODE_RESTART = 4 }; +#else // quick hack to log changes + enum { + BS_IDLE = 0, + BS_CREATE_TAB = 1, + BS_BUSY = 2, + BS_NODE_FAILURE = 3, + BS_NODE_RESTART = 4 + }; + struct BlockState; + friend struct BlockState; + struct BlockState { + BlockState() : + m_value(BS_IDLE) { + } + BlockState(int value) : + m_value(value) { + } + operator int() const { + return m_value; + } + BlockState& operator=(const BlockState& bs) { + Dbdict* dict = (Dbdict*)globalData.getBlock(DBDICT); + dict->infoEvent("DICT: bs %d->%d", m_value, bs.m_value); + m_value = bs.m_value; + return *this; + } + int m_value; + }; +#endif BlockState c_blockState; struct PackTable { @@ -1722,6 +1759,64 @@ private: // Unique key for operation XXX move to some system table Uint32 c_opRecordSequence; + /* + * Master DICT can be locked in 2 mutually exclusive ways: + * + * 1) for schema ops, via operation records + * 2) against schema ops, via a lock queue + * + * Current use of 2) is by a starting node, to prevent schema ops + * until started. The ops are refused (BlockState != BS_IDLE), + * not queued. + * + * Master failure is not handled, in node start case the starting + * node will crash too anyway. Use lock table in future.. + * + * The lock queue is "serial" but other behaviour is possible + * by checking lock types e.g. to allow parallel node starts. + * + * Checking release of last op record is not convenient with + * current structure (5.0). Instead we poll via continueB. + * + * XXX only table ops check BlockState + */ + + struct DictLockType { + DictLockReq::LockType lockType; + BlockState blockState; + const char* text; + }; + + struct DictLockRecord { + DictLockReq req; + const DictLockType* lt; + bool locked; + union { + Uint32 nextPool; + Uint32 nextList; + }; + Uint32 prevList; + }; + + typedef Ptr DictLockPtr; + ArrayPool c_dictLockPool; + DLFifoList c_dictLockQueue; + bool c_dictLockPoll; + + static const DictLockType* getDictLockType(Uint32 lockType); + void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text); + + void checkDictLockQueue(Signal* signal); + void sendDictLockConf(Signal* signal, DictLockPtr lockPtr); + void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode); + + // control polling i.e. continueB loop + void setDictLockPoll(Signal* signal, bool on); + + // NF handling + void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes); + + // Statement blocks /* ------------------------------------------------------------ */ diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp index 78acf1ffd19..f4a33df9805 100644 --- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp @@ -718,6 +718,9 @@ private: void checkPrepDropTabComplete(Signal *, TabRecordPtr tabPtr); void checkWaitDropTabFailedLqh(Signal *, Uint32 nodeId, Uint32 tableId); + void execDICT_LOCK_CONF(Signal* signal); + void execDICT_LOCK_REF(Signal* signal); + // Statement blocks //------------------------------------ // Methods that send signals @@ -935,6 +938,7 @@ private: void initialStartCompletedLab(Signal *); void allNodesLcpCompletedLab(Signal *); void nodeRestartPh2Lab(Signal *); + void nodeRestartPh2Lab2(Signal *); void initGciFilesLab(Signal *); void dictStartConfLab(Signal *); void nodeDictStartConfLab(Signal *); @@ -1594,6 +1598,30 @@ private: * Reply from nodeId */ void startInfoReply(Signal *, Uint32 nodeId); + + /* + * Lock master DICT. Only current use is by starting node + * during NR. A pool of slave records is convenient anyway. + */ + struct DictLockSlaveRecord { + Uint32 lockPtr; + Uint32 lockType; + bool locked; + Callback callback; + Uint32 nextPool; + }; + + typedef Ptr DictLockSlavePtr; + ArrayPool c_dictLockSlavePool; + + // slave + void sendDictLockReq(Signal* signal, Uint32 lockType, Callback c); + void recvDictLockConf(Signal* signal); + void sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI); + + // NR + Uint32 c_dictLockSlavePtrI_nodeRestart; // userPtr for NR + void recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret); }; #if (DIH_CDATA_SIZE < _SYSFILE_SIZE32) diff --git a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp index cd987048577..2b878034258 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp @@ -66,6 +66,9 @@ void Dbdih::initData() waitGCPProxyPool.setSize(ZPROXY_FILE_SIZE); waitGCPMasterPool.setSize(ZPROXY_MASTER_FILE_SIZE); + c_dictLockSlavePool.setSize(1); // assert single usage + c_dictLockSlavePtrI_nodeRestart = RNIL; + cgcpOrderBlocked = 0; c_lcpState.ctcCounter = 0; cwaitLcpSr = false; @@ -264,6 +267,9 @@ Dbdih::Dbdih(const class Configuration & config): addRecSignal(GSN_CREATE_FRAGMENTATION_REQ, &Dbdih::execCREATE_FRAGMENTATION_REQ); + addRecSignal(GSN_DICT_LOCK_CONF, &Dbdih::execDICT_LOCK_CONF); + addRecSignal(GSN_DICT_LOCK_REF, &Dbdih::execDICT_LOCK_REF); + apiConnectRecord = 0; connectRecord = 0; fileRecord = 0; diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 3ebad7f0cd2..c37461a1f65 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -67,6 +67,7 @@ #include #include #include +#include #include #include @@ -544,7 +545,7 @@ void Dbdih::execCONTINUEB(Signal* signal) break; case DihContinueB::ZSTART_PERMREQ_AGAIN: jam(); - nodeRestartPh2Lab(signal); + nodeRestartPh2Lab2(signal); return; break; case DihContinueB::SwitchReplica: @@ -1284,6 +1285,7 @@ void Dbdih::execNDB_STTOR(Signal* signal) case NodeState::ST_INITIAL_NODE_RESTART: case NodeState::ST_NODE_RESTART: jam(); + /*********************************************************************** * When starting nodes while system is operational we must be controlled * by the master since only one node restart is allowed at a time. @@ -1294,7 +1296,7 @@ void Dbdih::execNDB_STTOR(Signal* signal) req->startingRef = reference(); req->startingVersion = 0; // Obsolete sendSignal(cmasterdihref, GSN_START_MEREQ, signal, - StartMeReq::SignalLength, JBB); + StartMeReq::SignalLength, JBB); return; } ndbrequire(false); @@ -1354,6 +1356,24 @@ void Dbdih::execNDB_STTOR(Signal* signal) } ndbrequire(false); break; + case ZNDB_SPH7: + jam(); + switch (typestart) { + case NodeState::ST_INITIAL_START: + case NodeState::ST_SYSTEM_RESTART: + jam(); + ndbsttorry10Lab(signal, __LINE__); + return; + case NodeState::ST_NODE_RESTART: + case NodeState::ST_INITIAL_NODE_RESTART: + jam(); + sendDictUnlockOrd(signal, c_dictLockSlavePtrI_nodeRestart); + c_dictLockSlavePtrI_nodeRestart = RNIL; + ndbsttorry10Lab(signal, __LINE__); + return; + } + ndbrequire(false); + break; default: jam(); ndbsttorry10Lab(signal, __LINE__); @@ -1563,6 +1583,31 @@ void Dbdih::execREAD_NODESCONF(Signal* signal) /* START NODE LOGIC FOR NODE RESTART */ /*---------------------------------------------------------------------------*/ void Dbdih::nodeRestartPh2Lab(Signal* signal) +{ + /* + * Lock master DICT to avoid metadata operations during INR/NR. + * Done just before START_PERMREQ. + * + * It would be more elegant to do this just before START_MEREQ. + * The problem is, on INR we end up in massive invalidateNodeLCP + * which is not fully protected against metadata ops. + */ + ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL); + + Uint32 lockType = DictLockReq::NodeRestartLock; + Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 }; + sendDictLockReq(signal, lockType, c); +} + +void Dbdih::recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret) +{ + ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL); + c_dictLockSlavePtrI_nodeRestart = data; + + nodeRestartPh2Lab2(signal); +} + +void Dbdih::nodeRestartPh2Lab2(Signal* signal) { /*------------------------------------------------------------------------*/ // REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY @@ -1574,7 +1619,7 @@ void Dbdih::nodeRestartPh2Lab(Signal* signal) req->nodeId = cownNodeId; req->startType = cstarttype; sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB); -}//Dbdih::nodeRestartPh2Lab() +} void Dbdih::execSTART_PERMCONF(Signal* signal) { @@ -1696,12 +1741,12 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) const BlockReference retRef = req->blockRef; const Uint32 nodeId = req->nodeId; const Uint32 typeStart = req->startType; - CRASH_INSERTION(7122); ndbrequire(isMaster()); ndbrequire(refToNode(retRef) == nodeId); if ((c_nodeStartMaster.activeState) || - (c_nodeStartMaster.wait != ZFALSE)) { + (c_nodeStartMaster.wait != ZFALSE) || + ERROR_INSERTED_CLEAR(7174)) { jam(); signal->theData[0] = nodeId; signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR; @@ -10448,6 +10493,10 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) c_copyGCIMaster.m_copyReason, c_copyGCIMaster.m_waiting); break; + case GCP_READY: // shut up lint + case GCP_PREPARE_SENT: + case GCP_COMMIT_SENT: + break; } ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", @@ -14639,3 +14688,77 @@ Dbdih::NodeRecord::NodeRecord(){ copyCompleted = false; allowNodeStart = true; } + +// DICT lock slave + +void +Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c) +{ + DictLockReq* req = (DictLockReq*)&signal->theData[0]; + DictLockSlavePtr lockPtr; + + c_dictLockSlavePool.seize(lockPtr); + ndbrequire(lockPtr.i != RNIL); + + req->userPtr = lockPtr.i; + req->lockType = lockType; + req->userRef = reference(); + + lockPtr.p->lockPtr = RNIL; + lockPtr.p->lockType = lockType; + lockPtr.p->locked = false; + lockPtr.p->callback = c; + + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); + sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal, + DictLockReq::SignalLength, JBB); +} + +void +Dbdih::execDICT_LOCK_CONF(Signal* signal) +{ + jamEntry(); + recvDictLockConf(signal); +} + +void +Dbdih::execDICT_LOCK_REF(Signal* signal) +{ + jamEntry(); + ndbrequire(false); +} + +void +Dbdih::recvDictLockConf(Signal* signal) +{ + const DictLockConf* conf = (const DictLockConf*)&signal->theData[0]; + + DictLockSlavePtr lockPtr; + c_dictLockSlavePool.getPtr(lockPtr, conf->userPtr); + + lockPtr.p->lockPtr = conf->lockPtr; + ndbrequire(lockPtr.p->lockType == conf->lockType); + ndbrequire(lockPtr.p->locked == false); + lockPtr.p->locked = true; + + lockPtr.p->callback.m_callbackData = lockPtr.i; + execute(signal, lockPtr.p->callback, 0); +} + +void +Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI) +{ + DictUnlockOrd* ord = (DictUnlockOrd*)&signal->theData[0]; + + DictLockSlavePtr lockPtr; + c_dictLockSlavePool.getPtr(lockPtr, lockSlavePtrI); + + ord->lockPtr = lockPtr.p->lockPtr; + ord->lockType = lockPtr.p->lockType; + + c_dictLockSlavePool.release(lockPtr); + + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); + sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal, + DictUnlockOrd::SignalLength, JBB); +} diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 9a7256b4a55..b9bf522f7c8 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -2477,7 +2477,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) { jam(); CRASH_INSERTION(932); - BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); } @@ -2500,7 +2500,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) ndbrequire(false); case ZAPI_INACTIVE: { - BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); } diff --git a/ndb/src/kernel/main.cpp b/ndb/src/kernel/main.cpp index 7c1763485ce..649ae7cae3f 100644 --- a/ndb/src/kernel/main.cpp +++ b/ndb/src/kernel/main.cpp @@ -420,6 +420,10 @@ int main(int argc, char** argv) FILE * signalLog = fopen(buf, "a"); globalSignalLoggers.setOwnNodeId(globalData.ownId); globalSignalLoggers.setOutputStream(signalLog); +#if 0 // to log startup + globalSignalLoggers.log(SignalLoggerManager::LogInOut, "BLOCK=DBDICT,DBDIH"); + globalData.testOn = 1; +#endif #endif catchsigs(false); diff --git a/ndb/src/kernel/vm/DLFifoList.hpp b/ndb/src/kernel/vm/DLFifoList.hpp index b139ade831d..963ab007b65 100644 --- a/ndb/src/kernel/vm/DLFifoList.hpp +++ b/ndb/src/kernel/vm/DLFifoList.hpp @@ -115,6 +115,13 @@ public: */ bool hasNext(const Ptr &) const; + /** + * Check if prev exists i.e. this is not first + * + * NOTE ptr must be both p & i + */ + bool hasPrev(const Ptr &) const; + Uint32 noOfElements() const { Uint32 c = 0; Uint32 i = head.firstItem; @@ -357,4 +364,11 @@ DLFifoList::hasNext(const Ptr & p) const { return p.p->nextList != RNIL; } +template +inline +bool +DLFifoList::hasPrev(const Ptr & p) const { + return p.p->prevList != RNIL; +} + #endif diff --git a/ndb/src/kernel/vm/pc.hpp b/ndb/src/kernel/vm/pc.hpp index 6aeda59224f..95839c48e4e 100644 --- a/ndb/src/kernel/vm/pc.hpp +++ b/ndb/src/kernel/vm/pc.hpp @@ -125,11 +125,13 @@ #ifdef ERROR_INSERT #define ERROR_INSERT_VARIABLE UintR cerrorInsert #define ERROR_INSERTED(x) (cerrorInsert == (x)) +#define ERROR_INSERTED_CLEAR(x) (cerrorInsert == (x) ? (cerrorInsert = 0, true) : false) #define SET_ERROR_INSERT_VALUE(x) cerrorInsert = x #define CLEAR_ERROR_INSERT_VALUE cerrorInsert = 0 #else #define ERROR_INSERT_VARIABLE typedef void * cerrorInsert // Will generate compiler error if used #define ERROR_INSERTED(x) false +#define ERROR_INSERTED_CLEAR(x) false #define SET_ERROR_INSERT_VALUE(x) #define CLEAR_ERROR_INSERT_VALUE #endif diff --git a/ndb/src/ndbapi/ndberror.c b/ndb/src/ndbapi/ndberror.c index 5ca8ad7be60..657c57021bb 100644 --- a/ndb/src/ndbapi/ndberror.c +++ b/ndb/src/ndbapi/ndberror.c @@ -325,6 +325,7 @@ ErrorBundle ErrorCodes[] = { * SchemaError */ { 701, SE, "System busy with other schema operation" }, + { 711, SE, "System busy with node restart, schema operations not allowed" }, { 703, SE, "Invalid table format" }, { 704, SE, "Attribute name too long" }, { 705, SE, "Table name too long" }, diff --git a/ndb/test/ndbapi/testDict.cpp b/ndb/test/ndbapi/testDict.cpp index 710a47bf3dc..2cfb78f143e 100644 --- a/ndb/test/ndbapi/testDict.cpp +++ b/ndb/test/ndbapi/testDict.cpp @@ -1551,6 +1551,282 @@ end: return result; } +// NFNR + +// Restarter controls dict ops : 1-run 2-pause 3-stop +// synced by polling... + +static bool +send_dict_ops_cmd(NDBT_Context* ctx, Uint32 cmd) +{ + ctx->setProperty("DictOps_CMD", cmd); + while (1) { + if (ctx->isTestStopped()) + return false; + if (ctx->getProperty("DictOps_ACK") == cmd) + break; + NdbSleep_MilliSleep(100); + } + return true; +} + +static bool +recv_dict_ops_run(NDBT_Context* ctx) +{ + while (1) { + if (ctx->isTestStopped()) + return false; + Uint32 cmd = ctx->getProperty("DictOps_CMD"); + ctx->setProperty("DictOps_ACK", cmd); + if (cmd == 1) + break; + if (cmd == 3) + return false; + NdbSleep_MilliSleep(100); + } + return true; +} + +int +runRestarts(NDBT_Context* ctx, NDBT_Step* step) +{ + static int err_master[] = { // non-crashing + 0, + 7174 // send one fake START_PERMREF + }; + static int err_node[] = { + 0, + 7121, // crash on START_PERMCONF + 7130 // crash on START_MECONF + }; + const uint err_master_cnt = sizeof(err_master)/sizeof(err_master[0]); + const uint err_node_cnt = sizeof(err_node)/sizeof(err_node[0]); + + myRandom48Init(NdbTick_CurrentMillisecond()); + NdbRestarter restarter; + int result = NDBT_OK; + const int loops = ctx->getNumLoops(); + + for (int l = 0; l < loops && result == NDBT_OK; l++) { + g_info << "1: === loop " << l << " ===" << endl; + + // assuming 2-way replicated + + int numnodes = restarter.getNumDbNodes(); + CHECK(numnodes >= 1); + if (numnodes == 1) + break; + + int masterNodeId = restarter.getMasterNodeId(); + CHECK(masterNodeId != -1); + + // for more complex cases need more restarter support methods + + int nodeIdList[2] = { 0, 0 }; + int nodeIdCnt = 0; + + if (numnodes >= 2) { + int rand = myRandom48(numnodes); + int nodeId = restarter.getRandomNotMasterNodeId(rand); + CHECK(nodeId != -1); + nodeIdList[nodeIdCnt++] = nodeId; + } + + if (numnodes >= 4) { + int rand = myRandom48(numnodes); + int nodeId = restarter.getRandomNodeOtherNodeGroup(nodeIdList[0], rand); + CHECK(nodeId != -1); + if (nodeId != masterNodeId) + nodeIdList[nodeIdCnt++] = nodeId; + } + + g_info << "1: master=" << masterNodeId << " nodes=" << nodeIdList[0] << "," << nodeIdList[1] << endl; + + const unsigned maxsleep = 2000; //ms + + bool NF_ops = ctx->getProperty("Restart_NF_ops"); + uint NF_type = ctx->getProperty("Restart_NF_type"); + bool NR_ops = ctx->getProperty("Restart_NR_ops"); + bool NR_error = ctx->getProperty("Restart_NR_error"); + + g_info << "1: " << (NF_ops ? "run" : "pause") << " dict ops" << endl; + if (! send_dict_ops_cmd(ctx, NF_ops ? 1 : 2)) + break; + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + { + int i = 0; + while (i < nodeIdCnt) { + int nodeId = nodeIdList[i++]; + + bool nostart = true; + bool abort = NF_type == 0 ? myRandom48(2) : (NF_type == 2); + bool initial = myRandom48(2); + + char flags[40]; + strcpy(flags, "flags: nostart"); + if (abort) + strcat(flags, ",abort"); + if (initial) + strcat(flags, ",initial"); + + g_info << "1: restart " << nodeId << " " << flags << endl; + CHECK(restarter.restartOneDbNode(nodeId, initial, nostart, abort) == 0); + } + } + + g_info << "1: wait for nostart" << endl; + CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt) == 0); + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "1: " << (NR_ops ? "run" : "pause") << " dict ops" << endl; + if (! send_dict_ops_cmd(ctx, NR_ops ? 1 : 2)) + break; + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "1: start nodes" << endl; + CHECK(restarter.startNodes(nodeIdList, nodeIdCnt) == 0); + + if (NR_error) { + { + int rand = myRandom48(err_master_cnt); + int err = err_master[rand]; + if (err != 0) { + g_info << "1: insert master error " << err << endl; + CHECK(restarter.insertErrorInNode(masterNodeId, err) == 0); + } + } + + // limitation: cannot have 2 node restarts and crash_insert + // one node may die for real (NF during startup) + + int i = 0; + while (i < nodeIdCnt && nodeIdCnt == 1) { + int nodeId = nodeIdList[i++]; + + int rand = myRandom48(err_node_cnt); + int err = err_node[rand]; + if (err != 0) { + g_info << "1: insert node " << nodeId << " error " << err << endl; + CHECK(restarter.insertErrorInNode(nodeId, err) == 0); + } + } + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "1: wait cluster started" << endl; + CHECK(restarter.waitClusterStarted() == 0); + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "1: restart done" << endl; + } + + g_info << "1: stop dict ops" << endl; + send_dict_ops_cmd(ctx, 3); + + return result; +} + +int +runDictOps(NDBT_Context* ctx, NDBT_Step* step) +{ + myRandom48Init(NdbTick_CurrentMillisecond()); + int result = NDBT_OK; + + for (int l = 0; result == NDBT_OK; l++) { + if (! recv_dict_ops_run(ctx)) + break; + + g_info << "2: === loop " << l << " ===" << endl; + + Ndb* pNdb = GETNDB(step); + NdbDictionary::Dictionary* pDic = pNdb->getDictionary(); + const NdbDictionary::Table* pTab = ctx->getTab(); + const char* tabName = pTab->getName(); + + const unsigned long maxsleep = 100; //ms + + g_info << "2: create table" << endl; + { + uint count = 0; + try_create: + count++; + if (pDic->createTable(*pTab) != 0) { + const NdbError err = pDic->getNdbError(); + if (count == 1) + g_err << "2: " << tabName << ": create failed: " << err << endl; + if (err.code != 711) { + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + goto try_create; + } + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "2: verify create" << endl; + const NdbDictionary::Table* pTab2 = pDic->getTable(tabName); + if (pTab2 == NULL) { + const NdbError err = pDic->getNdbError(); + g_err << "2: " << tabName << ": verify create: " << err << endl; + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + // replace by the Retrieved table + pTab = pTab2; + + int records = myRandom48(ctx->getNumRecords()); + g_info << "2: load " << records << " records" << endl; + HugoTransactions hugoTrans(*pTab); + if (hugoTrans.loadTable(pNdb, records) != 0) { + // XXX get error code from hugo + g_err << "2: " << tabName << ": load failed" << endl; + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "2: drop" << endl; + { + uint count = 0; + try_drop: + count++; + if (pDic->dropTable(tabName) != 0) { + const NdbError err = pDic->getNdbError(); + if (count == 1) + g_err << "2: " << tabName << ": drop failed: " << err << endl; + if (err.code != 711) { + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + goto try_drop; + } + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "2: verify drop" << endl; + const NdbDictionary::Table* pTab3 = pDic->getTable(tabName); + if (pTab3 != NULL) { + g_err << "2: " << tabName << ": verify drop: table exists" << endl; + result = NDBT_FAILED; + break; + } + if (pDic->getNdbError().code != 709) { + const NdbError err = pDic->getNdbError(); + g_err << "2: " << tabName << ": verify drop: " << err << endl; + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + } + + return result; +} + NDBT_TESTSUITE(testDict); TESTCASE("CreateAndDrop", "Try to create and drop the table loop number of times\n"){ @@ -1655,6 +1931,34 @@ TESTCASE("FailAddFragment", "Fail add fragment or attribute in ACC or TUP or TUX\n"){ INITIALIZER(runFailAddFragment); } +TESTCASE("Restart_NF1", + "DICT ops during node graceful shutdown (not master)"){ + TC_PROPERTY("Restart_NF_ops", 1); + TC_PROPERTY("Restart_NF_type", 1); + STEP(runRestarts); + STEP(runDictOps); +} +TESTCASE("Restart_NF2", + "DICT ops during node shutdown abort (not master)"){ + TC_PROPERTY("Restart_NF_ops", 1); + TC_PROPERTY("Restart_NF_type", 2); + STEP(runRestarts); + STEP(runDictOps); +} +TESTCASE("Restart_NR1", + "DICT ops during node startup (not master)"){ + TC_PROPERTY("Restart_NR_ops", 1); + STEP(runRestarts); + STEP(runDictOps); +} +TESTCASE("Restart_NR2", + "DICT ops during node startup with crash inserts (not master)"){ + TC_PROPERTY("Restart_NR_ops", 1); + TC_PROPERTY("Restart_NR_error", 1); + STEP(runRestarts); + STEP(runDictOps); +} + NDBT_TESTSUITE_END(testDict); int main(int argc, const char** argv){ From a182963cc04b7ff0ca27abe33a075ac06e4e0c2e Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 11 Jun 2006 20:46:47 +0200 Subject: [PATCH 2/5] ndb - bug#18781 (5.0) handle rolling upgrade, minor fixes, logging, docs ndb/src/kernel/blocks/dbdict/DictLock.txt: NR signals ndb/src/kernel/blocks/dbdict/Dbdict.cpp: call removeStaleDictLocks at right place, comment why it works more checks, better logging ndb/src/kernel/blocks/dbdict/Dbdict.hpp: call removeStaleDictLocks at right place, comment why it works more checks, better logging ndb/include/kernel/signaldata/DictLock.hpp: 2 more REFs ndb/include/ndb_version.h.in: DICT LOCK appeared in 5.0.23 ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: DICT LOCK rolling upgrade from version < 5.0.23 ndb/src/kernel/blocks/ERROR_codes.txt: more DICT LOCK related testing ndb/test/ndbapi/testDict.cpp: more DICT LOCK related testing --- ndb/include/kernel/signaldata/DictLock.hpp | 4 +- ndb/include/ndb_version.h.in | 2 + ndb/src/kernel/blocks/ERROR_codes.txt | 6 +- ndb/src/kernel/blocks/dbdict/Dbdict.cpp | 101 +++++++++++++++++---- ndb/src/kernel/blocks/dbdict/Dbdict.hpp | 5 +- ndb/src/kernel/blocks/dbdict/DictLock.txt | 94 +++++++++++++++++++ ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 46 +++++++++- ndb/test/ndbapi/testDict.cpp | 67 +++++++++----- 8 files changed, 278 insertions(+), 47 deletions(-) create mode 100644 ndb/src/kernel/blocks/dbdict/DictLock.txt diff --git a/ndb/include/kernel/signaldata/DictLock.hpp b/ndb/include/kernel/signaldata/DictLock.hpp index c8f919f65a8..3e29d762962 100644 --- a/ndb/include/kernel/signaldata/DictLock.hpp +++ b/ndb/include/kernel/signaldata/DictLock.hpp @@ -55,7 +55,9 @@ public: enum ErrorCode { NotMaster = 1, InvalidLockType = 2, - TooManyRequests = 3 + BadUserRef = 3, + TooLate = 4, + TooManyRequests = 5 }; private: Uint32 userPtr; diff --git a/ndb/include/ndb_version.h.in b/ndb/include/ndb_version.h.in index 38b72306d03..7e878803f46 100644 --- a/ndb/include/ndb_version.h.in +++ b/ndb/include/ndb_version.h.in @@ -60,5 +60,7 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ]; #define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17) #define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18) +#define NDBD_DICT_LOCK_VERSION_5 MAKE_VERSION(5,0,23) + #endif diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index a63c1bef915..ddb99cb6b56 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4013 Next DBLQH 5043 Next DBDICT 6007 -Next DBDIH 7175 +Next DBDIH 7177 Next DBTC 8037 Next CMVMI 9000 Next BACKUP 10022 @@ -312,7 +312,9 @@ Test Crashes in handling node restarts 7170: Crash when receiving START_PERMREF (InitialStartRequired) -7174: Send one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR) +7174: Crash starting node before sending DICT_LOCK_REQ +7175: Master sends one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR) +7176: Slave NR pretends master does not support DICT lock (rolling upgrade) DICT: 6000 Crash during NR when receiving DICTSTARTREQ diff --git a/ndb/src/kernel/blocks/dbdict/Dbdict.cpp b/ndb/src/kernel/blocks/dbdict/Dbdict.cpp index 73007bd9aad..3cdba251492 100644 --- a/ndb/src/kernel/blocks/dbdict/Dbdict.cpp +++ b/ndb/src/kernel/blocks/dbdict/Dbdict.cpp @@ -205,7 +205,7 @@ void Dbdict::execCONTINUEB(Signal* signal) case ZDICT_LOCK_POLL: jam(); - checkDictLockQueue(signal); + checkDictLockQueue(signal, true); break; default : @@ -2836,7 +2836,6 @@ void Dbdict::execNODE_FAILREP(Signal* signal) case BS_NODE_RESTART: jam(); ok = true; - removeStaleDictLocks(signal, theFailedNodes); break; } ndbrequire(ok); @@ -2860,6 +2859,15 @@ void Dbdict::execNODE_FAILREP(Signal* signal) }//if }//for + /* + * NODE_FAILREP guarantees that no "in flight" signal from + * a dead node is accepted, and also that the job buffer contains + * no such (un-executed) signals. Therefore no DICT_UNLOCK_ORD + * from a dead node (leading to master crash) is possible after + * this clean-up removes the lock record. + */ + removeStaleDictLocks(signal, theFailedNodes); + }//execNODE_FAILREP() @@ -12210,7 +12218,7 @@ Dbdict::getIndexAttrMask(TableRecordPtr indexPtr, AttributeMask& mask) const Dbdict::DictLockType* Dbdict::getDictLockType(Uint32 lockType) { - static DictLockType lt[] = { + static const DictLockType lt[] = { { DictLockReq::NodeRestartLock, BS_NODE_RESTART, "NodeRestart" } }; for (int i = 0; i < sizeof(lt)/sizeof(lt[0]); i++) { @@ -12220,12 +12228,40 @@ Dbdict::getDictLockType(Uint32 lockType) return NULL; } +void +Dbdict::sendDictLockInfoEvent(Uint32 pollCount) +{ + DictLockPtr loopPtr; + c_dictLockQueue.first(loopPtr); + unsigned count = 0; + + char queue_buf[100]; + char *p = &queue_buf[0]; + const char *const q = &queue_buf[sizeof(queue_buf)]; + *p = 0; + + while (loopPtr.i != RNIL) { + jam(); + my_snprintf(p, q-p, "%s%u%s", + ++count == 1 ? "" : " ", + (unsigned)refToNode(loopPtr.p->req.userRef), + loopPtr.p->locked ? "L" : ""); + p += strlen(p); + c_dictLockQueue.next(loopPtr); + } + + infoEvent("DICT: lock bs: %d ops: %d poll: %d cnt: %d queue: %s", + (int)c_blockState, + c_opRecordPool.getSize() - c_opRecordPool.getNoOfFree(), + c_dictLockPoll, (int)pollCount, queue_buf); +} + void Dbdict::sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text) { infoEvent("DICT: %s %u for %s", text, - (unsigned int)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text); + (unsigned)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text); } void @@ -12234,6 +12270,8 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal) jamEntry(); const DictLockReq* req = (const DictLockReq*)&signal->theData[0]; + // make sure bad request crashes slave, not master (us) + if (getOwnNodeId() != c_masterNodeId) { jam(); sendDictLockRef(signal, *req, DictLockRef::NotMaster); @@ -12247,6 +12285,19 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal) return; } + if (req->userRef != signal->getSendersBlockRef() || + getNodeInfo(refToNode(req->userRef)).m_type != NodeInfo::DB) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::BadUserRef); + return; + } + + if (c_aliveNodes.get(refToNode(req->userRef))) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::TooLate); + return; + } + DictLockPtr lockPtr; if (! c_dictLockQueue.seize(lockPtr)) { jam(); @@ -12258,21 +12309,23 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal) lockPtr.p->locked = false; lockPtr.p->lt = lt; - checkDictLockQueue(signal); + checkDictLockQueue(signal, false); if (! lockPtr.p->locked) sendDictLockInfoEvent(lockPtr, "lock request by node"); } void -Dbdict::checkDictLockQueue(Signal* signal) +Dbdict::checkDictLockQueue(Signal* signal, bool poll) { + Uint32 pollCount = ! poll ? 0 : signal->theData[1]; + DictLockPtr lockPtr; do { if (! c_dictLockQueue.first(lockPtr)) { jam(); - setDictLockPoll(signal, false); + setDictLockPoll(signal, false, pollCount); return; } @@ -12299,7 +12352,7 @@ Dbdict::checkDictLockQueue(Signal* signal) // this routine is called again when it is removed for any reason bool on = ! lockPtr.p->locked; - setDictLockPoll(signal, on); + setDictLockPoll(signal, on, pollCount); } void @@ -12326,7 +12379,7 @@ Dbdict::execDICT_UNLOCK_ORD(Signal* signal) c_dictLockQueue.release(lockPtr); - checkDictLockQueue(signal); + checkDictLockQueue(signal, false); } void @@ -12359,21 +12412,32 @@ Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode) // control polling void -Dbdict::setDictLockPoll(Signal* signal, bool on) +Dbdict::setDictLockPoll(Signal* signal, bool on, Uint32 pollCount) { if (on) { jam(); signal->theData[0] = ZDICT_LOCK_POLL; - sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1); + signal->theData[1] = pollCount + 1; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2); } - if (c_dictLockPoll != on) { + bool change = (c_dictLockPoll != on); + + if (change) { jam(); -#ifdef VM_TRACE - infoEvent("DICT: lock polling %s", on ? "On" : "Off"); -#endif c_dictLockPoll = on; } + + // avoid too many messages if master is stuck busy (BS_NODE_FAILURE) + bool periodic = + pollCount < 8 || + pollCount < 64 && pollCount % 8 == 0 || + pollCount < 512 && pollCount % 64 == 0 || + pollCount < 4096 && pollCount % 512 == 0 || + pollCount % 4096 == 0; // about every 6 minutes + + if (change || periodic) + sendDictLockInfoEvent(pollCount); } // NF handling @@ -12384,6 +12448,11 @@ Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes) DictLockPtr loopPtr; c_dictLockQueue.first(loopPtr); + if (getOwnNodeId() != c_masterNodeId) { + ndbrequire(loopPtr.i == RNIL); + return; + } + while (loopPtr.i != RNIL) { jam(); DictLockPtr lockPtr = loopPtr; @@ -12409,7 +12478,7 @@ Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes) } } - checkDictLockQueue(signal); + checkDictLockQueue(signal, false); } diff --git a/ndb/src/kernel/blocks/dbdict/Dbdict.hpp b/ndb/src/kernel/blocks/dbdict/Dbdict.hpp index fbad67d8822..9c0bf65b69c 100644 --- a/ndb/src/kernel/blocks/dbdict/Dbdict.hpp +++ b/ndb/src/kernel/blocks/dbdict/Dbdict.hpp @@ -1804,14 +1804,15 @@ private: bool c_dictLockPoll; static const DictLockType* getDictLockType(Uint32 lockType); + void sendDictLockInfoEvent(Uint32 pollCount); void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text); - void checkDictLockQueue(Signal* signal); + void checkDictLockQueue(Signal* signal, bool poll); void sendDictLockConf(Signal* signal, DictLockPtr lockPtr); void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode); // control polling i.e. continueB loop - void setDictLockPoll(Signal* signal, bool on); + void setDictLockPoll(Signal* signal, bool on, Uint32 pollCount); // NF handling void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes); diff --git a/ndb/src/kernel/blocks/dbdict/DictLock.txt b/ndb/src/kernel/blocks/dbdict/DictLock.txt new file mode 100644 index 00000000000..17f24119e9d --- /dev/null +++ b/ndb/src/kernel/blocks/dbdict/DictLock.txt @@ -0,0 +1,94 @@ +Lock master DICT against schema operations + +Implementation +-------------- + +[ see comments in Dbdict.hpp ] + +Use case: Node startup INR / NR +------------------------------- + +Master DICT (like any block) keeps list of alive nodes (c_aliveNodes). +These are participants in schema ops. + +(1) c_aliveNodes is initialized when DICT starts + in sp3 in READ_NODESCONF from CNTR + +(2) when slave node fails (in any sp of the slave node) + it is removed from c_aliveNodes in NODE_FAILREP + +(3) when slave starts, it is added to c_aliveNodes + in sp4 of the starting node in INCL_NODEREQ + +Slave DIH locks master DICT in sp2 and releases the lock when started. +Based on the constraints: + +- the lock is taken when master DICT is known + DIH reads this in sp2 in READ_NODESCONF + +- the lock is taken before (3) + +- the lock is taken before copying starts and held until it is done + in sp4 DIH meta, DICT meta, tuple data + +- on INR in sp2 in START_PERMREQ the LCP info of the slave is erased + in all DIH in invalidateNodeLCP() - not safe under schema ops + +Signals: + +All but DICT_LOCK are standard v5.0 signals. +s=starting node, m=master, a=all participants, l=local block. + +* sp2 - DICT_LOCK and START_PERM + +DIH/s + DICT_LOCK_REQ + DICT/m + DICT_LOCK_CONF +DIH/s + START_PERMREQ + DIH/m + START_INFOREQ + DIH/a + invalidateNodeLCP() if INR + DIH/a + START_INFOCONF + DIH/m + START_PERMCONF +DIH/s + +* sp4 - START_ME (copy metadata, no changes) + +DIH/s + START_MEREQ + DIH/m + COPY_TABREQ + DIH/s + COPY_TABCONF + DIH/m + DICTSTARTREQ + DICT/s + GET_SCHEMA_INFOREQ + DICT/m + SCHEMA_INFO + DICT/s + DICTSTARTCONF + DIH/m + INCL_NODEREQ + DIH/a + INCL_NODEREQ + ANY/l + INCL_NODECONF + DIH/a + INCL_NODECONF + DIH/m + START_MECONF +DIH/s + +* sp7 - release DICT lock + +DIH/s + DICT_UNLOCK_ORD + DICT/m + +# vim: set et sw=4: diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index c37461a1f65..352053bef10 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -1594,6 +1594,9 @@ void Dbdih::nodeRestartPh2Lab(Signal* signal) */ ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL); + // check that we are not yet taking part in schema ops + CRASH_INSERTION(7174); + Uint32 lockType = DictLockReq::NodeRestartLock; Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 }; sendDictLockReq(signal, lockType, c); @@ -1746,7 +1749,7 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) ndbrequire(refToNode(retRef) == nodeId); if ((c_nodeStartMaster.activeState) || (c_nodeStartMaster.wait != ZFALSE) || - ERROR_INSERTED_CLEAR(7174)) { + ERROR_INSERTED_CLEAR(7175)) { jam(); signal->theData[0] = nodeId; signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR; @@ -14709,6 +14712,34 @@ Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c) lockPtr.p->locked = false; lockPtr.p->callback = c; + // handle rolling upgrade + { + Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version; + + unsigned int get_major = getMajor(masterVersion); + unsigned int get_minor = getMinor(masterVersion); + unsigned int get_build = getBuild(masterVersion); + + ndbrequire(get_major == 4 || get_major == 5); + + if (masterVersion < NDBD_DICT_LOCK_VERSION_5 || + ERROR_INSERTED(7176)) { + jam(); + + infoEvent("DIH: detect upgrade: master node %u old version %u.%u.%u", + (unsigned int)cmasterNodeId, get_major, get_minor, get_build); + + DictLockConf* conf = (DictLockConf*)&signal->theData[0]; + conf->userPtr = lockPtr.i; + conf->lockType = lockType; + conf->lockPtr = ZNIL; + + sendSignal(reference(), GSN_DICT_LOCK_CONF, signal, + DictLockConf::SignalLength, JBB); + return; + } + } + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal, DictLockReq::SignalLength, JBB); @@ -14758,6 +14789,19 @@ Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI) c_dictLockSlavePool.release(lockPtr); + // handle rolling upgrade + { + Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version; + + unsigned int get_major = getMajor(masterVersion); + ndbrequire(get_major == 4 || get_major == 5); + + if (masterVersion < NDBD_DICT_LOCK_VERSION_5 || + ERROR_INSERTED(7176)) { + return; + } + } + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal, DictUnlockOrd::SignalLength, JBB); diff --git a/ndb/test/ndbapi/testDict.cpp b/ndb/test/ndbapi/testDict.cpp index 2cfb78f143e..397f41b3d4e 100644 --- a/ndb/test/ndbapi/testDict.cpp +++ b/ndb/test/ndbapi/testDict.cpp @@ -1590,17 +1590,18 @@ recv_dict_ops_run(NDBT_Context* ctx) int runRestarts(NDBT_Context* ctx, NDBT_Step* step) { - static int err_master[] = { // non-crashing - 0, - 7174 // send one fake START_PERMREF + static int errlst_master[] = { // non-crashing + 7175, // send one fake START_PERMREF + 0 }; - static int err_node[] = { - 0, - 7121, // crash on START_PERMCONF - 7130 // crash on START_MECONF + static int errlst_node[] = { + 7174, // crash before sending DICT_LOCK_REQ + 7176, // pretend master does not support DICT lock + 7121, // crash at receive START_PERMCONF + 0 }; - const uint err_master_cnt = sizeof(err_master)/sizeof(err_master[0]); - const uint err_node_cnt = sizeof(err_node)/sizeof(err_node[0]); + const uint errcnt_master = sizeof(errlst_master)/sizeof(errlst_master[0]); + const uint errcnt_node = sizeof(errlst_node)/sizeof(errlst_node[0]); myRandom48Init(NdbTick_CurrentMillisecond()); NdbRestarter restarter; @@ -1632,7 +1633,7 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) nodeIdList[nodeIdCnt++] = nodeId; } - if (numnodes >= 4) { + if (numnodes >= 4 && myRandom48(2) == 0) { int rand = myRandom48(numnodes); int nodeId = restarter.getRandomNodeOtherNodeGroup(nodeIdList[0], rand); CHECK(nodeId != -1); @@ -1642,6 +1643,7 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) g_info << "1: master=" << masterNodeId << " nodes=" << nodeIdList[0] << "," << nodeIdList[1] << endl; + const uint timeout = 60; //secs for node wait const unsigned maxsleep = 2000; //ms bool NF_ops = ctx->getProperty("Restart_NF_ops"); @@ -1655,9 +1657,8 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) NdbSleep_MilliSleep(myRandom48(maxsleep)); { - int i = 0; - while (i < nodeIdCnt) { - int nodeId = nodeIdList[i++]; + for (int i = 0; i < nodeIdCnt; i++) { + int nodeId = nodeIdList[i]; bool nostart = true; bool abort = NF_type == 0 ? myRandom48(2) : (NF_type == 2); @@ -1676,9 +1677,31 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) } g_info << "1: wait for nostart" << endl; - CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt) == 0); + CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt, timeout) == 0); NdbSleep_MilliSleep(myRandom48(maxsleep)); + int err_master = 0; + int err_node[2] = { 0, 0 }; + + if (NR_error) { + err_master = errlst_master[l % errcnt_master]; + + // limitation: cannot have 2 node restarts and crash_insert + // one node may die for real (NF during startup) + + for (int i = 0; i < nodeIdCnt && nodeIdCnt == 1; i++) { + err_node[i] = errlst_node[l % errcnt_node]; + + // 7176 - no DICT lock protection + + if (err_node[i] == 7176) { + g_info << "1: no dict ops due to error insert " + << err_node[i] << endl; + NR_ops = false; + } + } + } + g_info << "1: " << (NR_ops ? "run" : "pause") << " dict ops" << endl; if (! send_dict_ops_cmd(ctx, NR_ops ? 1 : 2)) break; @@ -1689,23 +1712,17 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) if (NR_error) { { - int rand = myRandom48(err_master_cnt); - int err = err_master[rand]; + int err = err_master; if (err != 0) { g_info << "1: insert master error " << err << endl; CHECK(restarter.insertErrorInNode(masterNodeId, err) == 0); } } - // limitation: cannot have 2 node restarts and crash_insert - // one node may die for real (NF during startup) + for (int i = 0; i < nodeIdCnt; i++) { + int nodeId = nodeIdList[i]; - int i = 0; - while (i < nodeIdCnt && nodeIdCnt == 1) { - int nodeId = nodeIdList[i++]; - - int rand = myRandom48(err_node_cnt); - int err = err_node[rand]; + int err = err_node[i]; if (err != 0) { g_info << "1: insert node " << nodeId << " error " << err << endl; CHECK(restarter.insertErrorInNode(nodeId, err) == 0); @@ -1715,7 +1732,7 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step) NdbSleep_MilliSleep(myRandom48(maxsleep)); g_info << "1: wait cluster started" << endl; - CHECK(restarter.waitClusterStarted() == 0); + CHECK(restarter.waitClusterStarted(timeout) == 0); NdbSleep_MilliSleep(myRandom48(maxsleep)); g_info << "1: restart done" << endl; From e9452db1c1b7fb534a44590312d6608640675350 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 21 Jun 2006 01:50:20 +0400 Subject: [PATCH 3/5] Fix for bug#19634 "Re-execution of multi-delete which involve trigger/stored function crashes server". Attempts to execute prepared multi-delete statement which involved trigger or stored function caused server crashes (the same happened for such statements included in stored procedures in cases when one tried to execute them more than once). The problem was caused by yet another incorrect usage of check_table_access() routine (the latter assumes that table list which it gets as argument corresponds to value LEX::query_tables_own_last). We solve this problem by juggling with LEX::query_tables_own_last value when we call check_table_access() for LEX::auxilliary_table_list (better solution is too intrusive and should be done in 5.1). mysql-test/r/sp-prelocking.result: Added test for bug#19634 "Re-execution of multi-delete which involve trigger/ stored function crashes server". mysql-test/t/sp-prelocking.test: Added test for bug#19634 "Re-execution of multi-delete which involve trigger/ stored function crashes server". sql/sql_parse.cc: To call safely check_table_access() for LEX::auxilliary_table_list we have to juggle with LEX::query_tables_own_last value. --- mysql-test/r/sp-prelocking.result | 18 ++++++++++++++ mysql-test/t/sp-prelocking.test | 31 ++++++++++++++++++++++++ sql/sql_parse.cc | 40 +++++++++++++++++++++++++++---- 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/mysql-test/r/sp-prelocking.result b/mysql-test/r/sp-prelocking.result index 2335513b28a..7d8dd862748 100644 --- a/mysql-test/r/sp-prelocking.result +++ b/mysql-test/r/sp-prelocking.result @@ -237,3 +237,21 @@ deallocate prepare stmt; drop table t1; drop view v1, v2, v3; drop function bug15683; +drop table if exists t1, t2, t3; +drop function if exists bug19634; +create table t1 (id int, data int); +create table t2 (id int); +create table t3 (data int); +create function bug19634() returns int return (select count(*) from t3); +prepare stmt from "delete t1 from t1, t2 where t1.id = t2.id and bug19634()"; +execute stmt; +execute stmt; +deallocate prepare stmt; +create trigger t1_bi before delete on t1 for each row insert into t3 values (old.data); +prepare stmt from "delete t1 from t1, t2 where t1.id = t2.id"; +execute stmt; +execute stmt; +deallocate prepare stmt; +drop function bug19634; +drop table t1, t2, t3; +End of 5.0 tests diff --git a/mysql-test/t/sp-prelocking.test b/mysql-test/t/sp-prelocking.test index a7215462afb..b94de6236d3 100644 --- a/mysql-test/t/sp-prelocking.test +++ b/mysql-test/t/sp-prelocking.test @@ -272,3 +272,34 @@ drop table t1; drop view v1, v2, v3; drop function bug15683; + +# +# Bug#19634 "Re-execution of multi-delete which involve trigger/stored +# function crashes server" +# +--disable_warnings +drop table if exists t1, t2, t3; +drop function if exists bug19634; +--enable_warnings +create table t1 (id int, data int); +create table t2 (id int); +create table t3 (data int); +create function bug19634() returns int return (select count(*) from t3); +prepare stmt from "delete t1 from t1, t2 where t1.id = t2.id and bug19634()"; +# This should not crash server +execute stmt; +execute stmt; +deallocate prepare stmt; + +create trigger t1_bi before delete on t1 for each row insert into t3 values (old.data); +prepare stmt from "delete t1 from t1, t2 where t1.id = t2.id"; + +execute stmt; +execute stmt; +deallocate prepare stmt; + +drop function bug19634; +drop table t1, t2, t3; + + +--echo End of 5.0 tests diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index ba5c2ebf484..7ed96250240 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -5202,8 +5202,26 @@ bool check_global_access(THD *thd, ulong want_access) /* - Check the privilege for all used tables. Table privileges are cached - in the table list for GRANT checking + Check the privilege for all used tables. + + SYNOPSYS + check_table_access() + thd Thread context + want_access Privileges requested + tables List of tables to be checked + no_errors FALSE/TRUE - report/don't report error to + the client (using my_error() call). + + NOTES + Table privileges are cached in the table list for GRANT checking. + This functions assumes that table list used and + thd->lex->query_tables_own_last value correspond to each other + (the latter should be either 0 or point to next_global member + of one of elements of this table list). + + RETURN VALUE + FALSE - OK + TRUE - Access denied */ bool @@ -7068,14 +7086,28 @@ bool multi_delete_precheck(THD *thd, TABLE_LIST *tables) SELECT_LEX *select_lex= &thd->lex->select_lex; TABLE_LIST *aux_tables= (TABLE_LIST *)thd->lex->auxilliary_table_list.first; + TABLE_LIST **save_query_tables_own_last= thd->lex->query_tables_own_last; DBUG_ENTER("multi_delete_precheck"); /* sql_yacc guarantees that tables and aux_tables are not zero */ DBUG_ASSERT(aux_tables != 0); if (check_db_used(thd, tables) || check_db_used(thd,aux_tables) || - check_table_access(thd,SELECT_ACL, tables,0) || - check_table_access(thd,DELETE_ACL, aux_tables,0)) + check_table_access(thd, SELECT_ACL, tables, 0)) DBUG_RETURN(TRUE); + + /* + Since aux_tables list is not part of LEX::query_tables list we + have to juggle with LEX::query_tables_own_last value to be able + call check_table_access() safely. + */ + thd->lex->query_tables_own_last= 0; + if (check_table_access(thd, DELETE_ACL, aux_tables, 0)) + { + thd->lex->query_tables_own_last= save_query_tables_own_last; + DBUG_RETURN(TRUE); + } + thd->lex->query_tables_own_last= save_query_tables_own_last; + if ((thd->options & OPTION_SAFE_UPDATES) && !select_lex->where) { my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, From 15ac64063197f00a7343fb99613554788cca10b0 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 22 Jun 2006 19:15:03 +0400 Subject: [PATCH 4/5] Bug#15811: extremely long time for mysql client to execute long INSERT The problem was in redundant calls to strlen() in string functions, where we may then return after checking only the small number of characters. No test case is provided since it's a performance fix. strings/ctype-mb.c: Do not use strlen() where arbitrary horizon of at least CHARSET_INFO::mbmaxlen character is sufficient. --- strings/ctype-mb.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index a3e10ba7650..0d73c7d1e51 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -24,12 +24,12 @@ void my_caseup_str_mb(CHARSET_INFO * cs, char *str) { register uint32 l; - register char *end=str+strlen(str); /* BAR TODO: remove strlen() call */ register uchar *map=cs->to_upper; while (*str) { - if ((l=my_ismbchar(cs, str,end))) + /* Pointing after the '\0' is safe here. */ + if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen))) str+=l; else { @@ -42,12 +42,12 @@ void my_caseup_str_mb(CHARSET_INFO * cs, char *str) void my_casedn_str_mb(CHARSET_INFO * cs, char *str) { register uint32 l; - register char *end=str+strlen(str); register uchar *map=cs->to_lower; while (*str) { - if ((l=my_ismbchar(cs, str,end))) + /* Pointing after the '\0' is safe here. */ + if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen))) str+=l; else { @@ -101,15 +101,18 @@ uint my_casedn_mb(CHARSET_INFO * cs, char *src, uint srclen, return srclen; } +/* + my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise. + */ int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t) { register uint32 l; - register const char *end=s+strlen(s); register uchar *map=cs->to_upper; - while (smbmaxlen))) { while (l--) if (*s++ != *t++) @@ -120,7 +123,8 @@ int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t) else if (map[(uchar) *s++] != map[(uchar) *t++]) return 1; } - return *t; + /* At least one of '*s' and '*t' is zero here. */ + return (*t != *s); } From 67fd3c4a53f585f8e33b5094822cf639a27483de Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 22 Jun 2006 19:29:48 +0400 Subject: [PATCH 5/5] A fix and a test case for Bug#15217 "Using a SP cursor on a table created with PREPARE fails with weird error". More generally, re-executing a stored procedure with a complex SP cursor query could lead to a crash. The cause of the problem was that SP cursor queries were not optimized properly at first execution: their parse tree belongs to sp_instr_cpush, not sp_instr_copen, and thus the tree was tagged "EXECUTED" when the cursor was declared, not when it was opened. This led to loss of optimization transformations performed at first execution, as sp_instr_copen saw that the query is already "EXECUTED" and therefore either not ran first-execution related blocks or wrongly rolled back the transformations caused by first-execution code. The fix is to update the state of the parsed tree only when the tree is executed, as opposed to when the instruction containing the tree is executed. Assignment if i->state is moved to reset_lex_and_exec_core. mysql-test/r/sp.result: Test results fixed (Bug#15217) mysql-test/t/sp.test: Add a test case for Bug#15217 sql/sp_head.cc: Move assignment of stmt_arena->state to reset_lex_and_exec_core --- mysql-test/r/sp.result | 21 +++++++++++++++++++++ mysql-test/t/sp.test | 27 +++++++++++++++++++++++++++ sql/sp_head.cc | 4 +++- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/sp.result b/mysql-test/r/sp.result index ff378f1f43b..d3874c769fa 100644 --- a/mysql-test/r/sp.result +++ b/mysql-test/r/sp.result @@ -4990,4 +4990,25 @@ CALL bug18037_p2()| DROP FUNCTION bug18037_f1| DROP PROCEDURE bug18037_p1| DROP PROCEDURE bug18037_p2| +drop table if exists t3| +drop procedure if exists bug15217| +create table t3 as select 1| +create procedure bug15217() +begin +declare var1 char(255); +declare cur1 cursor for select * from t3; +open cur1; +fetch cur1 into var1; +select concat('data was: /', var1, '/'); +close cur1; +end | +call bug15217()| +concat('data was: /', var1, '/') +data was: /1/ +flush tables | +call bug15217()| +concat('data was: /', var1, '/') +data was: /1/ +drop table t3| +drop procedure bug15217| drop table t1,t2; diff --git a/mysql-test/t/sp.test b/mysql-test/t/sp.test index 1d21a5da187..66498198157 100644 --- a/mysql-test/t/sp.test +++ b/mysql-test/t/sp.test @@ -5888,6 +5888,33 @@ DROP FUNCTION bug18037_f1| DROP PROCEDURE bug18037_p1| DROP PROCEDURE bug18037_p2| +# +# Bug#15217 "Using a SP cursor on a table created with PREPARE fails with +# weird error". Check that the code that is supposed to work at +# the first execution of a stored procedure actually works for +# sp_instr_copen. + +--disable_warnings +drop table if exists t3| +drop procedure if exists bug15217| +--enable_warnings +create table t3 as select 1| +create procedure bug15217() +begin + declare var1 char(255); + declare cur1 cursor for select * from t3; + open cur1; + fetch cur1 into var1; + select concat('data was: /', var1, '/'); + close cur1; +end | +# Returns expected result +call bug15217()| +flush tables | +# Returns error with garbage as column name +call bug15217()| +drop table t3| +drop procedure bug15217| # # BUG#NNNN: New bug synopsis diff --git a/sql/sp_head.cc b/sql/sp_head.cc index 3b29a841966..ef2f895c8b2 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -1075,7 +1075,6 @@ sp_head::execute(THD *thd) thd->net.no_send_error= 0; if (i->free_list) cleanup_items(i->free_list); - i->state= Query_arena::EXECUTED; /* If we've set thd->user_var_events_alloc to mem_root of this SP @@ -2210,6 +2209,9 @@ sp_lex_keeper::reset_lex_and_exec_core(THD *thd, uint *nextp, m_lex->mark_as_requiring_prelocking(NULL); } thd->rollback_item_tree_changes(); + /* Update the state of the active arena. */ + thd->stmt_arena->state= Query_arena::EXECUTED; + /* Unlike for PS we should not call Item's destructors for newly created