diff --git a/sql/handler.cc b/sql/handler.cc index 3331e014001..a98618172be 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -3263,37 +3263,39 @@ namespace int write_locked_table_maps(THD *thd) { DBUG_ENTER("write_locked_table_maps"); - DBUG_PRINT("enter", ("thd=%p, thd->lock=%p, thd->locked_tables=%p", - thd, thd->lock, thd->locked_tables)); + DBUG_PRINT("enter", ("thd=%p, thd->lock=%p, thd->locked_tables=%p, thd->extra_lock", + thd, thd->lock, thd->locked_tables, thd->extra_lock)); if (thd->get_binlog_table_maps() == 0) { - /* - Exactly one table has to be locked, otherwise this code is not - guaranteed to work. - */ - DBUG_ASSERT((thd->lock != NULL) + (thd->locked_tables != NULL) == 1); - - MYSQL_LOCK *lock= thd->lock ? thd->lock : thd->locked_tables; - DBUG_ASSERT(lock->table_count > 0); - TABLE **const end_ptr= lock->table + lock->table_count; - for (TABLE **table_ptr= lock->table ; - table_ptr != end_ptr ; - ++table_ptr) + MYSQL_LOCK *const locks[] = { + thd->extra_lock, thd->lock, thd->locked_tables + }; + for (my_ptrdiff_t i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i ) { - TABLE *const table= *table_ptr; - DBUG_PRINT("info", ("Checking table %s", table->s->table_name)); - if (table->current_lock == F_WRLCK && - check_table_binlog_row_based(thd, table)) + MYSQL_LOCK const *const lock= locks[i]; + if (lock == NULL) + continue; + + TABLE **const end_ptr= lock->table + lock->table_count; + for (TABLE **table_ptr= lock->table ; + table_ptr != end_ptr ; + ++table_ptr) { - int const has_trans= table->file->has_transactions(); - int const error= thd->binlog_write_table_map(table, has_trans); - /* - If an error occurs, it is the responsibility of the caller to - roll back the transaction. - */ - if (unlikely(error)) - DBUG_RETURN(1); + TABLE *const table= *table_ptr; + DBUG_PRINT("info", ("Checking table %s", table->s->table_name)); + if (table->current_lock == F_WRLCK && + check_table_binlog_row_based(thd, table)) + { + int const has_trans= table->file->has_transactions(); + int const error= thd->binlog_write_table_map(table, has_trans); + /* + If an error occurs, it is the responsibility of the caller to + roll back the transaction. + */ + if (unlikely(error)) + DBUG_RETURN(1); + } } } } diff --git a/sql/sql_class.h b/sql/sql_class.h index 2d42196c5e1..b8491892330 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -692,6 +692,14 @@ public: THD::prelocked_mode for more info.) */ MYSQL_LOCK *locked_tables; + + /* + CREATE-SELECT keeps an extra lock for the table being + created. This field is used to keep the extra lock available for + lower level routines, which would otherwise miss that lock. + */ + MYSQL_LOCK *extra_lock; + /* prelocked_mode_type enum and prelocked_mode member are used for indicating whenever "prelocked mode" is on, and what type of @@ -744,7 +752,7 @@ public: void reset_open_tables_state() { open_tables= temporary_tables= handler_tables= derived_tables= 0; - lock= locked_tables= 0; + extra_lock= lock= locked_tables= 0; prelocked_mode= NON_PRELOCKED; state_flags= 0U; } @@ -1595,9 +1603,6 @@ class select_insert :public select_result_interceptor { bool send_eof(); /* not implemented: select_insert is never re-used in prepared statements */ void cleanup(); - -protected: - MYSQL_LOCK *lock; }; diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index f1616e12d42..7b017ad7317 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -2188,7 +2188,6 @@ select_insert::select_insert(TABLE_LIST *table_list_par, TABLE *table_par, bool ignore_check_option_errors) :table_list(table_list_par), table(table_par), fields(fields_par), last_insert_id(0), - lock(0), insert_into_view(table_list_par && table_list_par->view != 0) { bzero((char*) &info,sizeof(info)); @@ -2356,6 +2355,7 @@ bool select_insert::send_data(List &values) { DBUG_ENTER("select_insert::send_data"); bool error=0; + if (unit->offset_limit_cnt) { // using limit offset,count unit->offset_limit_cnt--; @@ -2377,34 +2377,8 @@ bool select_insert::send_data(List &values) } } - /* - The thd->lock lock contain the locks for the select part of the - statement and the 'lock' variable contain the write lock for the - currently locked table that is being created or inserted - into. However, the row-based replication will investigate the - thd->lock to decide what table maps are to be written, so this one - has to contain the tables locked for writing. To be able to write - table map for the table being created, we temporarily set - THD::lock to select_insert::lock while writing the record to the - storage engine. We cannot set this elsewhere, since the execution - of a stored function inside the select expression might cause the - lock structures to be NULL. - */ - - { - MYSQL_LOCK *saved_lock= NULL; - if (lock) - { - saved_lock= thd->lock; - thd->lock= lock; - } - - error= write_record(thd, table, &info); + error= write_record(thd, table, &info); - if (lock) - thd->lock= saved_lock; - } - if (!error) { if (table->triggers || info.handle_duplicates == DUP_UPDATE) @@ -2776,8 +2750,8 @@ select_create::prepare(List &values, SELECT_LEX_UNIT *u) unit= u; if (!(table= create_table_from_items(thd, create_info, create_table, - extra_fields, keys, &values, &lock, - hook_ptr))) + extra_fields, keys, &values, + &thd->extra_lock, hook_ptr))) DBUG_RETURN(-1); // abort() deletes table if (table->s->fields < values.elements) @@ -2884,13 +2858,13 @@ bool select_create::send_eof() { table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); VOID(pthread_mutex_lock(&LOCK_open)); - mysql_unlock_tables(thd, lock); + mysql_unlock_tables(thd, thd->extra_lock); if (!table->s->tmp_table) { if (close_thread_table(thd, &table)) VOID(pthread_cond_broadcast(&COND_refresh)); } - lock=0; + thd->extra_lock=0; table=0; VOID(pthread_mutex_unlock(&LOCK_open)); } @@ -2900,10 +2874,10 @@ bool select_create::send_eof() void select_create::abort() { VOID(pthread_mutex_lock(&LOCK_open)); - if (lock) + if (thd->extra_lock) { - mysql_unlock_tables(thd, lock); - lock=0; + mysql_unlock_tables(thd, thd->extra_lock); + thd->extra_lock=0; } if (table) { diff --git a/storage/ndb/include/kernel/GlobalSignalNumbers.h b/storage/ndb/include/kernel/GlobalSignalNumbers.h index f2808e9b15c..950b2629d9e 100644 --- a/storage/ndb/include/kernel/GlobalSignalNumbers.h +++ b/storage/ndb/include/kernel/GlobalSignalNumbers.h @@ -517,16 +517,12 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES; #define GSN_TEST_ORD 407 #define GSN_TESTSIG 408 #define GSN_TIME_SIGNAL 409 -/* 410 unused */ -/* 411 unused */ -/* 412 unused */ #define GSN_TUP_ABORTREQ 414 #define GSN_TUP_ADD_ATTCONF 415 #define GSN_TUP_ADD_ATTRREF 416 #define GSN_TUP_ADD_ATTRREQ 417 #define GSN_TUP_ATTRINFO 418 #define GSN_TUP_COMMITREQ 419 -/* 420 unused */ /* 421 unused */ /* 422 unused */ @@ -981,4 +977,10 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES; #define GSN_DICT_ABORT_REF 668 #define GSN_DICT_ABORT_CONF 669 +/* DICT LOCK signals */ +#define GSN_DICT_LOCK_REQ 410 +#define GSN_DICT_LOCK_CONF 411 +#define GSN_DICT_LOCK_REF 412 +#define GSN_DICT_UNLOCK_ORD 420 + #endif diff --git a/storage/ndb/include/kernel/signaldata/AlterTable.hpp b/storage/ndb/include/kernel/signaldata/AlterTable.hpp index 1cdc7c00fcb..afd90541c9e 100644 --- a/storage/ndb/include/kernel/signaldata/AlterTable.hpp +++ b/storage/ndb/include/kernel/signaldata/AlterTable.hpp @@ -196,6 +196,7 @@ public: InvalidTableVersion = 241, DropInProgress = 283, Busy = 701, + BusyWithNR = 711, NotMaster = 702, InvalidFormat = 703, AttributeNameTooLong = 704, diff --git a/storage/ndb/include/kernel/signaldata/CreateTable.hpp b/storage/ndb/include/kernel/signaldata/CreateTable.hpp index b43a5e76eaf..e5e78bddd49 100644 --- a/storage/ndb/include/kernel/signaldata/CreateTable.hpp +++ b/storage/ndb/include/kernel/signaldata/CreateTable.hpp @@ -77,6 +77,7 @@ public: enum ErrorCode { NoError = 0, Busy = 701, + BusyWithNR = 711, NotMaster = 702, InvalidFormat = 703, AttributeNameTooLong = 704, diff --git a/storage/ndb/include/kernel/signaldata/DictLock.hpp b/storage/ndb/include/kernel/signaldata/DictLock.hpp new file mode 100644 index 00000000000..3e29d762962 --- /dev/null +++ b/storage/ndb/include/kernel/signaldata/DictLock.hpp @@ -0,0 +1,78 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef DICT_LOCK_HPP +#define DICT_LOCK_HPP + +#include "SignalData.hpp" + +// see comments in Dbdict.hpp + +class DictLockReq { + friend class Dbdict; + friend class Dbdih; +public: + STATIC_CONST( SignalLength = 3 ); + enum LockType { + NoLock = 0, + NodeRestartLock = 1 + }; +private: + Uint32 userPtr; + Uint32 lockType; + Uint32 userRef; +}; + +class DictLockConf { + friend class Dbdict; + friend class Dbdih; +public: + STATIC_CONST( SignalLength = 3 ); +private: + Uint32 userPtr; + Uint32 lockType; + Uint32 lockPtr; +}; + +class DictLockRef { + friend class Dbdict; + friend class Dbdih; +public: + STATIC_CONST( SignalLength = 3 ); + enum ErrorCode { + NotMaster = 1, + InvalidLockType = 2, + BadUserRef = 3, + TooLate = 4, + TooManyRequests = 5 + }; +private: + Uint32 userPtr; + Uint32 lockType; + Uint32 errorCode; +}; + +class DictUnlockOrd { + friend class Dbdict; + friend class Dbdih; +public: + STATIC_CONST( SignalLength = 2 ); +private: + Uint32 lockPtr; + Uint32 lockType; +}; + +#endif diff --git a/storage/ndb/include/kernel/signaldata/DropTable.hpp b/storage/ndb/include/kernel/signaldata/DropTable.hpp index cae6aff8754..e762446d2b8 100644 --- a/storage/ndb/include/kernel/signaldata/DropTable.hpp +++ b/storage/ndb/include/kernel/signaldata/DropTable.hpp @@ -53,6 +53,7 @@ public: enum ErrorCode { Busy = 701, + BusyWithNR = 711, NotMaster = 702, NoSuchTable = 709, InvalidTableVersion = 241, diff --git a/storage/ndb/include/ndb_version.h.in b/storage/ndb/include/ndb_version.h.in index 5b67796a019..ef35fa4093b 100644 --- a/storage/ndb/include/ndb_version.h.in +++ b/storage/ndb/include/ndb_version.h.in @@ -64,5 +64,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ]; #define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17) #define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18) #define NDBD_FRAGID_VERSION (MAKE_VERSION(5,1,6)) +#define NDBD_DICT_LOCK_VERSION_5 MAKE_VERSION(5,0,23) +#define NDBD_DICT_LOCK_VERSION_5_1 MAKE_VERSION(5,1,12) + #endif diff --git a/storage/ndb/src/common/debugger/SignalLoggerManager.cpp b/storage/ndb/src/common/debugger/SignalLoggerManager.cpp index d8710d2058f..67e13dc805a 100644 --- a/storage/ndb/src/common/debugger/SignalLoggerManager.cpp +++ b/storage/ndb/src/common/debugger/SignalLoggerManager.cpp @@ -139,7 +139,7 @@ SignalLoggerManager::log(LogMode logMode, const char * params) } else { for (int i = 0; i < count; ++i){ BlockNumber number = getBlockNo(blocks[i]); - cnt += log(SLM_ON, number-MIN_BLOCK_NO, logMode); + cnt += log(SLM_ON, number, logMode); } } for(int i = 0; ireq.userRef), + loopPtr.p->locked ? "L" : ""); + p += strlen(p); + c_dictLockQueue.next(loopPtr); + } + + infoEvent("DICT: lock bs: %d ops: %d poll: %d cnt: %d queue: %s", + (int)c_blockState, + c_opRecordPool.getSize() - c_opRecordPool.getNoOfFree(), + c_dictLockPoll, (int)pollCount, queue_buf); +} + +void +Dbdict::sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text) +{ + infoEvent("DICT: %s %u for %s", + text, + (unsigned)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text); +} + +void +Dbdict::execDICT_LOCK_REQ(Signal* signal) +{ + jamEntry(); + const DictLockReq* req = (const DictLockReq*)&signal->theData[0]; + + // make sure bad request crashes slave, not master (us) + + if (getOwnNodeId() != c_masterNodeId) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::NotMaster); + return; + } + + const DictLockType* lt = getDictLockType(req->lockType); + if (lt == NULL) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::InvalidLockType); + return; + } + + if (req->userRef != signal->getSendersBlockRef() || + getNodeInfo(refToNode(req->userRef)).m_type != NodeInfo::DB) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::BadUserRef); + return; + } + + if (c_aliveNodes.get(refToNode(req->userRef))) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::TooLate); + return; + } + + DictLockPtr lockPtr; + if (! c_dictLockQueue.seize(lockPtr)) { + jam(); + sendDictLockRef(signal, *req, DictLockRef::TooManyRequests); + return; + } + + lockPtr.p->req = *req; + lockPtr.p->locked = false; + lockPtr.p->lt = lt; + + checkDictLockQueue(signal, false); + + if (! lockPtr.p->locked) + sendDictLockInfoEvent(lockPtr, "lock request by node"); +} + +void +Dbdict::checkDictLockQueue(Signal* signal, bool poll) +{ + Uint32 pollCount = ! poll ? 0 : signal->theData[1]; + + DictLockPtr lockPtr; + + do { + if (! c_dictLockQueue.first(lockPtr)) { + jam(); + setDictLockPoll(signal, false, pollCount); + return; + } + + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + break; + } + + if (c_opRecordPool.getNoOfFree() != c_opRecordPool.getSize()) { + jam(); + break; + } + + ndbrequire(c_blockState == BS_IDLE); + lockPtr.p->locked = true; + c_blockState = lockPtr.p->lt->blockState; + sendDictLockConf(signal, lockPtr); + + sendDictLockInfoEvent(lockPtr, "locked by node"); + } while (0); + + // poll while first request is open + // this routine is called again when it is removed for any reason + + bool on = ! lockPtr.p->locked; + setDictLockPoll(signal, on, pollCount); +} + +void +Dbdict::execDICT_UNLOCK_ORD(Signal* signal) +{ + jamEntry(); + const DictUnlockOrd* ord = (const DictUnlockOrd*)&signal->theData[0]; + + DictLockPtr lockPtr; + c_dictLockQueue.getPtr(lockPtr, ord->lockPtr); + ndbrequire(lockPtr.p->lt->lockType == ord->lockType); + + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize()); + ndbrequire(! c_dictLockQueue.hasPrev(lockPtr)); + + c_blockState = BS_IDLE; + sendDictLockInfoEvent(lockPtr, "unlocked by node"); + } else { + sendDictLockInfoEvent(lockPtr, "lock request removed by node"); + } + + c_dictLockQueue.release(lockPtr); + + checkDictLockQueue(signal, false); +} + +void +Dbdict::sendDictLockConf(Signal* signal, DictLockPtr lockPtr) +{ + DictLockConf* conf = (DictLockConf*)&signal->theData[0]; + const DictLockReq& req = lockPtr.p->req; + + conf->userPtr = req.userPtr; + conf->lockType = req.lockType; + conf->lockPtr = lockPtr.i; + + sendSignal(req.userRef, GSN_DICT_LOCK_CONF, signal, + DictLockConf::SignalLength, JBB); +} + +void +Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode) +{ + DictLockRef* ref = (DictLockRef*)&signal->theData[0]; + + ref->userPtr = req.userPtr; + ref->lockType = req.lockType; + ref->errorCode = errorCode; + + sendSignal(req.userRef, GSN_DICT_LOCK_REF, signal, + DictLockRef::SignalLength, JBB); +} + +// control polling + +void +Dbdict::setDictLockPoll(Signal* signal, bool on, Uint32 pollCount) +{ + if (on) { + jam(); + signal->theData[0] = ZDICT_LOCK_POLL; + signal->theData[1] = pollCount + 1; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2); + } + + bool change = (c_dictLockPoll != on); + + if (change) { + jam(); + c_dictLockPoll = on; + } + + // avoid too many messages if master is stuck busy (BS_NODE_FAILURE) + bool periodic = + pollCount < 8 || + pollCount < 64 && pollCount % 8 == 0 || + pollCount < 512 && pollCount % 64 == 0 || + pollCount < 4096 && pollCount % 512 == 0 || + pollCount % 4096 == 0; // about every 6 minutes + + if (change || periodic) + sendDictLockInfoEvent(pollCount); +} + +// NF handling + +void +Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes) +{ + DictLockPtr loopPtr; + c_dictLockQueue.first(loopPtr); + + if (getOwnNodeId() != c_masterNodeId) { + ndbrequire(loopPtr.i == RNIL); + return; + } + + while (loopPtr.i != RNIL) { + jam(); + DictLockPtr lockPtr = loopPtr; + c_dictLockQueue.next(loopPtr); + + Uint32 nodeId = refToNode(lockPtr.p->req.userRef); + + if (NodeBitmask::get(theFailedNodes, nodeId)) { + if (lockPtr.p->locked) { + jam(); + ndbrequire(c_blockState == lockPtr.p->lt->blockState); + ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize()); + ndbrequire(! c_dictLockQueue.hasPrev(lockPtr)); + + c_blockState = BS_IDLE; + + sendDictLockInfoEvent(lockPtr, "remove lock by failed node"); + } else { + sendDictLockInfoEvent(lockPtr, "remove lock request by failed node"); + } + + c_dictLockQueue.release(lockPtr); + } + } + + checkDictLockQueue(signal, false); +} + + /* **************************************************************** */ /* ---------------------------------------------------------------- */ /* MODULE: STORE/RESTORE SCHEMA FILE---------------------- */ diff --git a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp index c770b6c69b8..58656023e4e 100644 --- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp +++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp @@ -51,6 +51,7 @@ #include #include #include +#include #include "SchemaFile.hpp" #include #include @@ -68,6 +69,7 @@ /*--------------------------------------------------------------*/ #define ZPACK_TABLE_INTO_PAGES 0 #define ZSEND_GET_TAB_RESPONSE 3 +#define ZDICT_LOCK_POLL 4 /*--------------------------------------------------------------*/ @@ -812,6 +814,9 @@ private: void execDROP_FILEGROUP_REF(Signal* signal); void execDROP_FILEGROUP_CONF(Signal* signal); + void execDICT_LOCK_REQ(Signal* signal); + void execDICT_UNLOCK_ORD(Signal* signal); + /* * 2.4 COMMON STORED VARIABLES */ @@ -1046,12 +1051,44 @@ private: // State variables /* ----------------------------------------------------------------------- */ +#ifndef ndb_dbdict_log_block_state enum BlockState { BS_IDLE = 0, BS_CREATE_TAB = 1, BS_BUSY = 2, - BS_NODE_FAILURE = 3 + BS_NODE_FAILURE = 3, + BS_NODE_RESTART = 4 }; +#else // quick hack to log changes + enum { + BS_IDLE = 0, + BS_CREATE_TAB = 1, + BS_BUSY = 2, + BS_NODE_FAILURE = 3, + BS_NODE_RESTART = 4 + }; + struct BlockState; + friend struct BlockState; + struct BlockState { + BlockState() : + m_value(BS_IDLE) { + } + BlockState(int value) : + m_value(value) { + } + operator int() const { + return m_value; + } + BlockState& operator=(const BlockState& bs) { + Dbdict* dict = (Dbdict*)globalData.getBlock(DBDICT); + dict->infoEvent("DICT: bs %d->%d", m_value, bs.m_value); + globalSignalLoggers.log(DBDICT, "bs %d->%d", m_value, bs.m_value); + m_value = bs.m_value; + return *this; + } + int m_value; + }; +#endif BlockState c_blockState; struct PackTable { @@ -2015,6 +2052,65 @@ private: // Unique key for operation XXX move to some system table Uint32 c_opRecordSequence; + /* + * Master DICT can be locked in 2 mutually exclusive ways: + * + * 1) for schema ops, via operation records + * 2) against schema ops, via a lock queue + * + * Current use of 2) is by a starting node, to prevent schema ops + * until started. The ops are refused (BlockState != BS_IDLE), + * not queued. + * + * Master failure is not handled, in node start case the starting + * node will crash too anyway. Use lock table in future.. + * + * The lock queue is "serial" but other behaviour is possible + * by checking lock types e.g. to allow parallel node starts. + * + * Checking release of last op record is not convenient with + * current structure (5.0). Instead we poll via continueB. + * + * XXX only table ops check BlockState + */ + + struct DictLockType { + DictLockReq::LockType lockType; + BlockState blockState; + const char* text; + }; + + struct DictLockRecord { + DictLockReq req; + const DictLockType* lt; + bool locked; + union { + Uint32 nextPool; + Uint32 nextList; + }; + Uint32 prevList; + }; + + typedef Ptr DictLockPtr; + ArrayPool c_dictLockPool; + DLFifoList c_dictLockQueue; + bool c_dictLockPoll; + + static const DictLockType* getDictLockType(Uint32 lockType); + void sendDictLockInfoEvent(Uint32 pollCount); + void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text); + + void checkDictLockQueue(Signal* signal, bool poll); + void sendDictLockConf(Signal* signal, DictLockPtr lockPtr); + void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode); + + // control polling i.e. continueB loop + void setDictLockPoll(Signal* signal, bool on, Uint32 pollCount); + + // NF handling + void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes); + + // Statement blocks /* ------------------------------------------------------------ */ diff --git a/storage/ndb/src/kernel/blocks/dbdict/DictLock.txt b/storage/ndb/src/kernel/blocks/dbdict/DictLock.txt new file mode 100644 index 00000000000..17f24119e9d --- /dev/null +++ b/storage/ndb/src/kernel/blocks/dbdict/DictLock.txt @@ -0,0 +1,94 @@ +Lock master DICT against schema operations + +Implementation +-------------- + +[ see comments in Dbdict.hpp ] + +Use case: Node startup INR / NR +------------------------------- + +Master DICT (like any block) keeps list of alive nodes (c_aliveNodes). +These are participants in schema ops. + +(1) c_aliveNodes is initialized when DICT starts + in sp3 in READ_NODESCONF from CNTR + +(2) when slave node fails (in any sp of the slave node) + it is removed from c_aliveNodes in NODE_FAILREP + +(3) when slave starts, it is added to c_aliveNodes + in sp4 of the starting node in INCL_NODEREQ + +Slave DIH locks master DICT in sp2 and releases the lock when started. +Based on the constraints: + +- the lock is taken when master DICT is known + DIH reads this in sp2 in READ_NODESCONF + +- the lock is taken before (3) + +- the lock is taken before copying starts and held until it is done + in sp4 DIH meta, DICT meta, tuple data + +- on INR in sp2 in START_PERMREQ the LCP info of the slave is erased + in all DIH in invalidateNodeLCP() - not safe under schema ops + +Signals: + +All but DICT_LOCK are standard v5.0 signals. +s=starting node, m=master, a=all participants, l=local block. + +* sp2 - DICT_LOCK and START_PERM + +DIH/s + DICT_LOCK_REQ + DICT/m + DICT_LOCK_CONF +DIH/s + START_PERMREQ + DIH/m + START_INFOREQ + DIH/a + invalidateNodeLCP() if INR + DIH/a + START_INFOCONF + DIH/m + START_PERMCONF +DIH/s + +* sp4 - START_ME (copy metadata, no changes) + +DIH/s + START_MEREQ + DIH/m + COPY_TABREQ + DIH/s + COPY_TABCONF + DIH/m + DICTSTARTREQ + DICT/s + GET_SCHEMA_INFOREQ + DICT/m + SCHEMA_INFO + DICT/s + DICTSTARTCONF + DIH/m + INCL_NODEREQ + DIH/a + INCL_NODEREQ + ANY/l + INCL_NODECONF + DIH/a + INCL_NODECONF + DIH/m + START_MECONF +DIH/s + +* sp7 - release DICT lock + +DIH/s + DICT_UNLOCK_ORD + DICT/m + +# vim: set et sw=4: diff --git a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp index f98df82ea7d..a8dad87d81c 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +++ b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp @@ -718,6 +718,9 @@ private: void checkPrepDropTabComplete(Signal *, TabRecordPtr tabPtr); void checkWaitDropTabFailedLqh(Signal *, Uint32 nodeId, Uint32 tableId); + void execDICT_LOCK_CONF(Signal* signal); + void execDICT_LOCK_REF(Signal* signal); + // Statement blocks //------------------------------------ // Methods that send signals @@ -935,6 +938,7 @@ private: void initialStartCompletedLab(Signal *); void allNodesLcpCompletedLab(Signal *); void nodeRestartPh2Lab(Signal *); + void nodeRestartPh2Lab2(Signal *); void initGciFilesLab(Signal *); void dictStartConfLab(Signal *); void nodeDictStartConfLab(Signal *); @@ -1603,6 +1607,30 @@ private: void startInfoReply(Signal *, Uint32 nodeId); void dump_replica_info(); + + /* + * Lock master DICT. Only current use is by starting node + * during NR. A pool of slave records is convenient anyway. + */ + struct DictLockSlaveRecord { + Uint32 lockPtr; + Uint32 lockType; + bool locked; + Callback callback; + Uint32 nextPool; + }; + + typedef Ptr DictLockSlavePtr; + ArrayPool c_dictLockSlavePool; + + // slave + void sendDictLockReq(Signal* signal, Uint32 lockType, Callback c); + void recvDictLockConf(Signal* signal); + void sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI); + + // NR + Uint32 c_dictLockSlavePtrI_nodeRestart; // userPtr for NR + void recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret); }; #if (DIH_CDATA_SIZE < _SYSFILE_SIZE32) diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp index a6ec3749606..ec4ac3c812a 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp @@ -53,6 +53,9 @@ void Dbdih::initData() waitGCPProxyPool.setSize(ZPROXY_FILE_SIZE); waitGCPMasterPool.setSize(ZPROXY_MASTER_FILE_SIZE); + c_dictLockSlavePool.setSize(1); // assert single usage + c_dictLockSlavePtrI_nodeRestart = RNIL; + cgcpOrderBlocked = 0; c_lcpState.ctcCounter = 0; cwaitLcpSr = false; @@ -251,6 +254,9 @@ Dbdih::Dbdih(Block_context& ctx): addRecSignal(GSN_CREATE_FRAGMENTATION_REQ, &Dbdih::execCREATE_FRAGMENTATION_REQ); + addRecSignal(GSN_DICT_LOCK_CONF, &Dbdih::execDICT_LOCK_CONF); + addRecSignal(GSN_DICT_LOCK_REF, &Dbdih::execDICT_LOCK_REF); + apiConnectRecord = 0; connectRecord = 0; fileRecord = 0; diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index dc93dca4e46..620f7aeca85 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -545,7 +546,7 @@ void Dbdih::execCONTINUEB(Signal* signal) break; case DihContinueB::ZSTART_PERMREQ_AGAIN: jam(); - nodeRestartPh2Lab(signal); + nodeRestartPh2Lab2(signal); return; break; case DihContinueB::SwitchReplica: @@ -1295,6 +1296,7 @@ void Dbdih::execNDB_STTOR(Signal* signal) case NodeState::ST_INITIAL_NODE_RESTART: case NodeState::ST_NODE_RESTART: jam(); + /*********************************************************************** * When starting nodes while system is operational we must be controlled * by the master since only one node restart is allowed at a time. @@ -1305,7 +1307,7 @@ void Dbdih::execNDB_STTOR(Signal* signal) req->startingRef = reference(); req->startingVersion = 0; // Obsolete sendSignal(cmasterdihref, GSN_START_MEREQ, signal, - StartMeReq::SignalLength, JBB); + StartMeReq::SignalLength, JBB); return; } ndbrequire(false); @@ -1365,6 +1367,24 @@ void Dbdih::execNDB_STTOR(Signal* signal) } ndbrequire(false); break; + case ZNDB_SPH7: + jam(); + switch (typestart) { + case NodeState::ST_INITIAL_START: + case NodeState::ST_SYSTEM_RESTART: + jam(); + ndbsttorry10Lab(signal, __LINE__); + return; + case NodeState::ST_NODE_RESTART: + case NodeState::ST_INITIAL_NODE_RESTART: + jam(); + sendDictUnlockOrd(signal, c_dictLockSlavePtrI_nodeRestart); + c_dictLockSlavePtrI_nodeRestart = RNIL; + ndbsttorry10Lab(signal, __LINE__); + return; + } + ndbrequire(false); + break; default: jam(); ndbsttorry10Lab(signal, __LINE__); @@ -1574,6 +1594,34 @@ void Dbdih::execREAD_NODESCONF(Signal* signal) /* START NODE LOGIC FOR NODE RESTART */ /*---------------------------------------------------------------------------*/ void Dbdih::nodeRestartPh2Lab(Signal* signal) +{ + /* + * Lock master DICT to avoid metadata operations during INR/NR. + * Done just before START_PERMREQ. + * + * It would be more elegant to do this just before START_MEREQ. + * The problem is, on INR we end up in massive invalidateNodeLCP + * which is not fully protected against metadata ops. + */ + ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL); + + // check that we are not yet taking part in schema ops + CRASH_INSERTION(7174); + + Uint32 lockType = DictLockReq::NodeRestartLock; + Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 }; + sendDictLockReq(signal, lockType, c); +} + +void Dbdih::recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret) +{ + ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL); + c_dictLockSlavePtrI_nodeRestart = data; + + nodeRestartPh2Lab2(signal); +} + +void Dbdih::nodeRestartPh2Lab2(Signal* signal) { /*------------------------------------------------------------------------*/ // REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY @@ -1585,7 +1633,7 @@ void Dbdih::nodeRestartPh2Lab(Signal* signal) req->nodeId = cownNodeId; req->startType = cstarttype; sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB); -}//Dbdih::nodeRestartPh2Lab() +} void Dbdih::execSTART_PERMCONF(Signal* signal) { @@ -1710,12 +1758,12 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) const BlockReference retRef = req->blockRef; const Uint32 nodeId = req->nodeId; const Uint32 typeStart = req->startType; - CRASH_INSERTION(7122); ndbrequire(isMaster()); ndbrequire(refToNode(retRef) == nodeId); if ((c_nodeStartMaster.activeState) || - (c_nodeStartMaster.wait != ZFALSE)) { + (c_nodeStartMaster.wait != ZFALSE) || + ERROR_INSERTED_CLEAR(7175)) { jam(); signal->theData[0] = nodeId; signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR; @@ -10780,6 +10828,10 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) c_copyGCIMaster.m_copyReason, c_copyGCIMaster.m_waiting); break; + case GCP_READY: // shut up lint + case GCP_PREPARE_SENT: + case GCP_COMMIT_SENT: + break; } ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", @@ -14978,3 +15030,122 @@ Dbdih::NodeRecord::NodeRecord(){ copyCompleted = false; allowNodeStart = true; } + +// DICT lock slave + +void +Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c) +{ + DictLockReq* req = (DictLockReq*)&signal->theData[0]; + DictLockSlavePtr lockPtr; + + c_dictLockSlavePool.seize(lockPtr); + ndbrequire(lockPtr.i != RNIL); + + req->userPtr = lockPtr.i; + req->lockType = lockType; + req->userRef = reference(); + + lockPtr.p->lockPtr = RNIL; + lockPtr.p->lockType = lockType; + lockPtr.p->locked = false; + lockPtr.p->callback = c; + + // handle rolling upgrade + { + Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version; + + const unsigned int get_major = getMajor(masterVersion); + const unsigned int get_minor = getMinor(masterVersion); + const unsigned int get_build = getBuild(masterVersion); + ndbrequire(get_major >= 4); + + if (masterVersion < NDBD_DICT_LOCK_VERSION_5 || + masterVersion < NDBD_DICT_LOCK_VERSION_5_1 && + get_major == 5 && get_minor == 1 || + ERROR_INSERTED(7176)) { + jam(); + + infoEvent("DIH: detect upgrade: master node %u old version %u.%u.%u", + (unsigned int)cmasterNodeId, get_major, get_minor, get_build); + + DictLockConf* conf = (DictLockConf*)&signal->theData[0]; + conf->userPtr = lockPtr.i; + conf->lockType = lockType; + conf->lockPtr = ZNIL; + + sendSignal(reference(), GSN_DICT_LOCK_CONF, signal, + DictLockConf::SignalLength, JBB); + return; + } + } + + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); + sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal, + DictLockReq::SignalLength, JBB); +} + +void +Dbdih::execDICT_LOCK_CONF(Signal* signal) +{ + jamEntry(); + recvDictLockConf(signal); +} + +void +Dbdih::execDICT_LOCK_REF(Signal* signal) +{ + jamEntry(); + ndbrequire(false); +} + +void +Dbdih::recvDictLockConf(Signal* signal) +{ + const DictLockConf* conf = (const DictLockConf*)&signal->theData[0]; + + DictLockSlavePtr lockPtr; + c_dictLockSlavePool.getPtr(lockPtr, conf->userPtr); + + lockPtr.p->lockPtr = conf->lockPtr; + ndbrequire(lockPtr.p->lockType == conf->lockType); + ndbrequire(lockPtr.p->locked == false); + lockPtr.p->locked = true; + + lockPtr.p->callback.m_callbackData = lockPtr.i; + execute(signal, lockPtr.p->callback, 0); +} + +void +Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI) +{ + DictUnlockOrd* ord = (DictUnlockOrd*)&signal->theData[0]; + + DictLockSlavePtr lockPtr; + c_dictLockSlavePool.getPtr(lockPtr, lockSlavePtrI); + + ord->lockPtr = lockPtr.p->lockPtr; + ord->lockType = lockPtr.p->lockType; + + c_dictLockSlavePool.release(lockPtr); + + // handle rolling upgrade + { + Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version; + + const unsigned int get_major = getMajor(masterVersion); + const unsigned int get_minor = getMinor(masterVersion); + ndbrequire(get_major >= 4); + + if (masterVersion < NDBD_DICT_LOCK_VERSION_5 || + masterVersion < NDBD_DICT_LOCK_VERSION_5_1 && + get_major == 5 && get_minor == 1 || + ERROR_INSERTED(7176)) { + return; + } + } + + BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId); + sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal, + DictUnlockOrd::SignalLength, JBB); +} diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp index b51cc02a703..878fe77d998 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp @@ -63,7 +63,7 @@ Dbtup::tuxAllocNode(Signal* signal, if ((ptr= alloc_fix_rec(fragPtr.p, tablePtr.p, &key, &frag_page_id)) == 0) { ljam(); - ndbrequire(terrorCode != 0); + terrorCode = ZMEM_NOMEM_ERROR; // caller sets error return terrorCode; } pageId= key.m_page_no; diff --git a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index ebc40cb385d..1eac369ec65 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -2477,7 +2477,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) { jam(); CRASH_INSERTION(932); - BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); } @@ -2500,7 +2500,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) ndbrequire(false); case ZAPI_INACTIVE: { - BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); } diff --git a/storage/ndb/src/kernel/main.cpp b/storage/ndb/src/kernel/main.cpp index b380a4e74c3..2eb6cf4d869 100644 --- a/storage/ndb/src/kernel/main.cpp +++ b/storage/ndb/src/kernel/main.cpp @@ -420,6 +420,10 @@ int main(int argc, char** argv) FILE * signalLog = fopen(buf, "a"); globalSignalLoggers.setOwnNodeId(globalData.ownId); globalSignalLoggers.setOutputStream(signalLog); +#if 0 // to log startup + globalSignalLoggers.log(SignalLoggerManager::LogInOut, "BLOCK=DBDICT,DBDIH"); + globalData.testOn = 1; +#endif #endif catchsigs(false); diff --git a/storage/ndb/src/kernel/vm/pc.hpp b/storage/ndb/src/kernel/vm/pc.hpp index e6efad65779..6b9b563aa27 100644 --- a/storage/ndb/src/kernel/vm/pc.hpp +++ b/storage/ndb/src/kernel/vm/pc.hpp @@ -125,11 +125,13 @@ #ifdef ERROR_INSERT #define ERROR_INSERT_VARIABLE UintR cerrorInsert #define ERROR_INSERTED(x) (cerrorInsert == (x)) +#define ERROR_INSERTED_CLEAR(x) (cerrorInsert == (x) ? (cerrorInsert = 0, true) : false) #define SET_ERROR_INSERT_VALUE(x) cerrorInsert = x #define CLEAR_ERROR_INSERT_VALUE cerrorInsert = 0 #else #define ERROR_INSERT_VARIABLE typedef void * cerrorInsert // Will generate compiler error if used #define ERROR_INSERTED(x) false +#define ERROR_INSERTED_CLEAR(x) false #define SET_ERROR_INSERT_VALUE(x) #define CLEAR_ERROR_INSERT_VALUE #endif diff --git a/storage/ndb/src/ndbapi/ndberror.c b/storage/ndb/src/ndbapi/ndberror.c index b72ce4bd3c6..4816cda1c74 100644 --- a/storage/ndb/src/ndbapi/ndberror.c +++ b/storage/ndb/src/ndbapi/ndberror.c @@ -214,6 +214,7 @@ ErrorBundle ErrorCodes[] = { * OverloadError */ { 701, DMEC, OL, "System busy with other schema operation" }, + { 711, DMEC, OL, "System busy with node restart, schema operations not allowed" }, { 410, DMEC, OL, "REDO log files overloaded, consult online manual (decrease TimeBetweenLocalCheckpoints, and|or increase NoOfFragmentLogFiles)" }, { 677, DMEC, OL, "Index UNDO buffers overloaded (increase UndoIndexBuffer)" }, { 891, DMEC, OL, "Data UNDO buffers overloaded (increase UndoDataBuffer)" }, diff --git a/storage/ndb/test/ndbapi/testDict.cpp b/storage/ndb/test/ndbapi/testDict.cpp index b015e16bae1..f6277484b04 100644 --- a/storage/ndb/test/ndbapi/testDict.cpp +++ b/storage/ndb/test/ndbapi/testDict.cpp @@ -1644,6 +1644,302 @@ end: return result; } +// NFNR + +// Restarter controls dict ops : 1-run 2-pause 3-stop +// synced by polling... + +static bool +send_dict_ops_cmd(NDBT_Context* ctx, Uint32 cmd) +{ + ctx->setProperty("DictOps_CMD", cmd); + while (1) { + if (ctx->isTestStopped()) + return false; + if (ctx->getProperty("DictOps_ACK") == cmd) + break; + NdbSleep_MilliSleep(100); + } + return true; +} + +static bool +recv_dict_ops_run(NDBT_Context* ctx) +{ + while (1) { + if (ctx->isTestStopped()) + return false; + Uint32 cmd = ctx->getProperty("DictOps_CMD"); + ctx->setProperty("DictOps_ACK", cmd); + if (cmd == 1) + break; + if (cmd == 3) + return false; + NdbSleep_MilliSleep(100); + } + return true; +} + +int +runRestarts(NDBT_Context* ctx, NDBT_Step* step) +{ + static int errlst_master[] = { // non-crashing + 7175, // send one fake START_PERMREF + 0 + }; + static int errlst_node[] = { + 7174, // crash before sending DICT_LOCK_REQ + 7176, // pretend master does not support DICT lock + 7121, // crash at receive START_PERMCONF + 0 + }; + const uint errcnt_master = sizeof(errlst_master)/sizeof(errlst_master[0]); + const uint errcnt_node = sizeof(errlst_node)/sizeof(errlst_node[0]); + + myRandom48Init(NdbTick_CurrentMillisecond()); + NdbRestarter restarter; + int result = NDBT_OK; + const int loops = ctx->getNumLoops(); + + for (int l = 0; l < loops && result == NDBT_OK; l++) { + g_info << "1: === loop " << l << " ===" << endl; + + // assuming 2-way replicated + + int numnodes = restarter.getNumDbNodes(); + CHECK(numnodes >= 1); + if (numnodes == 1) + break; + + int masterNodeId = restarter.getMasterNodeId(); + CHECK(masterNodeId != -1); + + // for more complex cases need more restarter support methods + + int nodeIdList[2] = { 0, 0 }; + int nodeIdCnt = 0; + + if (numnodes >= 2) { + int rand = myRandom48(numnodes); + int nodeId = restarter.getRandomNotMasterNodeId(rand); + CHECK(nodeId != -1); + nodeIdList[nodeIdCnt++] = nodeId; + } + + if (numnodes >= 4 && myRandom48(2) == 0) { + int rand = myRandom48(numnodes); + int nodeId = restarter.getRandomNodeOtherNodeGroup(nodeIdList[0], rand); + CHECK(nodeId != -1); + if (nodeId != masterNodeId) + nodeIdList[nodeIdCnt++] = nodeId; + } + + g_info << "1: master=" << masterNodeId << " nodes=" << nodeIdList[0] << "," << nodeIdList[1] << endl; + + const uint timeout = 60; //secs for node wait + const unsigned maxsleep = 2000; //ms + + bool NF_ops = ctx->getProperty("Restart_NF_ops"); + uint NF_type = ctx->getProperty("Restart_NF_type"); + bool NR_ops = ctx->getProperty("Restart_NR_ops"); + bool NR_error = ctx->getProperty("Restart_NR_error"); + + g_info << "1: " << (NF_ops ? "run" : "pause") << " dict ops" << endl; + if (! send_dict_ops_cmd(ctx, NF_ops ? 1 : 2)) + break; + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + { + for (int i = 0; i < nodeIdCnt; i++) { + int nodeId = nodeIdList[i]; + + bool nostart = true; + bool abort = NF_type == 0 ? myRandom48(2) : (NF_type == 2); + bool initial = myRandom48(2); + + char flags[40]; + strcpy(flags, "flags: nostart"); + if (abort) + strcat(flags, ",abort"); + if (initial) + strcat(flags, ",initial"); + + g_info << "1: restart " << nodeId << " " << flags << endl; + CHECK(restarter.restartOneDbNode(nodeId, initial, nostart, abort) == 0); + } + } + + g_info << "1: wait for nostart" << endl; + CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt, timeout) == 0); + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + int err_master = 0; + int err_node[2] = { 0, 0 }; + + if (NR_error) { + err_master = errlst_master[l % errcnt_master]; + + // limitation: cannot have 2 node restarts and crash_insert + // one node may die for real (NF during startup) + + for (int i = 0; i < nodeIdCnt && nodeIdCnt == 1; i++) { + err_node[i] = errlst_node[l % errcnt_node]; + + // 7176 - no DICT lock protection + + if (err_node[i] == 7176) { + g_info << "1: no dict ops due to error insert " + << err_node[i] << endl; + NR_ops = false; + } + } + } + + g_info << "1: " << (NR_ops ? "run" : "pause") << " dict ops" << endl; + if (! send_dict_ops_cmd(ctx, NR_ops ? 1 : 2)) + break; + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "1: start nodes" << endl; + CHECK(restarter.startNodes(nodeIdList, nodeIdCnt) == 0); + + if (NR_error) { + { + int err = err_master; + if (err != 0) { + g_info << "1: insert master error " << err << endl; + CHECK(restarter.insertErrorInNode(masterNodeId, err) == 0); + } + } + + for (int i = 0; i < nodeIdCnt; i++) { + int nodeId = nodeIdList[i]; + + int err = err_node[i]; + if (err != 0) { + g_info << "1: insert node " << nodeId << " error " << err << endl; + CHECK(restarter.insertErrorInNode(nodeId, err) == 0); + } + } + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "1: wait cluster started" << endl; + CHECK(restarter.waitClusterStarted(timeout) == 0); + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "1: restart done" << endl; + } + + g_info << "1: stop dict ops" << endl; + send_dict_ops_cmd(ctx, 3); + + return result; +} + +int +runDictOps(NDBT_Context* ctx, NDBT_Step* step) +{ + myRandom48Init(NdbTick_CurrentMillisecond()); + int result = NDBT_OK; + + for (int l = 0; result == NDBT_OK; l++) { + if (! recv_dict_ops_run(ctx)) + break; + + g_info << "2: === loop " << l << " ===" << endl; + + Ndb* pNdb = GETNDB(step); + NdbDictionary::Dictionary* pDic = pNdb->getDictionary(); + const NdbDictionary::Table* pTab = ctx->getTab(); + //const char* tabName = pTab->getName(); //XXX what goes on? + char tabName[40]; + strcpy(tabName, pTab->getName()); + + const unsigned long maxsleep = 100; //ms + + g_info << "2: create table" << endl; + { + uint count = 0; + try_create: + count++; + if (pDic->createTable(*pTab) != 0) { + const NdbError err = pDic->getNdbError(); + if (count == 1) + g_err << "2: " << tabName << ": create failed: " << err << endl; + if (err.code != 711) { + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + goto try_create; + } + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "2: verify create" << endl; + const NdbDictionary::Table* pTab2 = pDic->getTable(tabName); + if (pTab2 == NULL) { + const NdbError err = pDic->getNdbError(); + g_err << "2: " << tabName << ": verify create: " << err << endl; + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + // replace by the Retrieved table + pTab = pTab2; + + int records = ctx->getNumRecords(); + g_info << "2: load " << records << " records" << endl; + HugoTransactions hugoTrans(*pTab); + if (hugoTrans.loadTable(pNdb, records) != 0) { + // XXX get error code from hugo + g_err << "2: " << tabName << ": load failed" << endl; + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "2: drop" << endl; + { + uint count = 0; + try_drop: + count++; + if (pDic->dropTable(tabName) != 0) { + const NdbError err = pDic->getNdbError(); + if (count == 1) + g_err << "2: " << tabName << ": drop failed: " << err << endl; + if (err.code != 711) { + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + goto try_drop; + } + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + + g_info << "2: verify drop" << endl; + const NdbDictionary::Table* pTab3 = pDic->getTable(tabName); + if (pTab3 != NULL) { + g_err << "2: " << tabName << ": verify drop: table exists" << endl; + result = NDBT_FAILED; + break; + } + if (pDic->getNdbError().code != 709 && + pDic->getNdbError().code != 723) { + const NdbError err = pDic->getNdbError(); + g_err << "2: " << tabName << ": verify drop: " << err << endl; + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + } + + return result; +} + NDBT_TESTSUITE(testDict); TESTCASE("CreateAndDrop", "Try to create and drop the table loop number of times\n"){ @@ -1757,6 +2053,34 @@ TESTCASE("FailAddFragment", "Fail add fragment or attribute in ACC or TUP or TUX\n"){ INITIALIZER(runFailAddFragment); } +TESTCASE("Restart_NF1", + "DICT ops during node graceful shutdown (not master)"){ + TC_PROPERTY("Restart_NF_ops", 1); + TC_PROPERTY("Restart_NF_type", 1); + STEP(runRestarts); + STEP(runDictOps); +} +TESTCASE("Restart_NF2", + "DICT ops during node shutdown abort (not master)"){ + TC_PROPERTY("Restart_NF_ops", 1); + TC_PROPERTY("Restart_NF_type", 2); + STEP(runRestarts); + STEP(runDictOps); +} +TESTCASE("Restart_NR1", + "DICT ops during node startup (not master)"){ + TC_PROPERTY("Restart_NR_ops", 1); + STEP(runRestarts); + STEP(runDictOps); +} +TESTCASE("Restart_NR2", + "DICT ops during node startup with crash inserts (not master)"){ + TC_PROPERTY("Restart_NR_ops", 1); + TC_PROPERTY("Restart_NR_error", 1); + STEP(runRestarts); + STEP(runDictOps); +} + NDBT_TESTSUITE_END(testDict); int main(int argc, const char** argv){ diff --git a/storage/ndb/test/src/NdbRestarter.cpp b/storage/ndb/test/src/NdbRestarter.cpp index 2c16a05240d..b25c42ec18e 100644 --- a/storage/ndb/test/src/NdbRestarter.cpp +++ b/storage/ndb/test/src/NdbRestarter.cpp @@ -329,7 +329,10 @@ NdbRestarter::waitNodesState(int * _nodes, int _num_nodes, } g_info << "State node " << ndbNode->node_id << " " - << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl; + << ndb_mgm_get_node_status_string(ndbNode->node_status); + if (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING) + g_info<< ", start_phase=" << ndbNode->start_phase; + g_info << endl; assert(ndbNode != NULL); diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh index 9e507bbe2b5..459699988ab 100644 --- a/support-files/mysql.server.sh +++ b/support-files/mysql.server.sh @@ -98,6 +98,11 @@ PATH=/sbin:/usr/sbin:/bin:/usr/bin:$basedir/bin export PATH mode=$1 # start or stop +shift +other_args="$*" # uncommon, but needed when called from an RPM upgrade action + # Expected: "--skip-networking --skip-grant-tables" + # They are not checked here, intentionally, as it is the resposibility + # of the "spec" file author to give correct arguments only. case `echo "testing\c"`,`echo -n testing` in *c*,-n*) echo_n= echo_c= ;; @@ -264,6 +269,11 @@ case "$mode" in echo $echo_n "Starting MySQL" if test -x $manager -a "$use_mysqld_safe" = "0" then + if test -n "$other_args" + then + log_failure_msg "MySQL manager does not support options '$other_args'" + exit 1 + fi # Give extra arguments to mysqld with the my.cnf file. This script may # be overwritten at next upgrade. "$manager" \ @@ -282,7 +292,7 @@ case "$mode" in # Give extra arguments to mysqld with the my.cnf file. This script # may be overwritten at next upgrade. pid_file=$server_pid_file - $bindir/mysqld_safe --datadir=$datadir --pid-file=$server_pid_file >/dev/null 2>&1 & + $bindir/mysqld_safe --datadir=$datadir --pid-file=$server_pid_file $other_args >/dev/null 2>&1 & wait_for_pid created # Make lock for RedHat / SuSE @@ -330,8 +340,8 @@ case "$mode" in 'restart') # Stop the service and regardless of whether it was # running or not, start it again. - $0 stop - $0 start + $0 stop $other_args + $0 start $other_args ;; 'reload') @@ -346,7 +356,7 @@ case "$mode" in *) # usage - echo "Usage: $0 start|stop|restart|reload" + echo "Usage: $0 {start|stop|restart|reload} [ MySQL server options ]" exit 1 ;; esac diff --git a/support-files/mysql.spec.sh b/support-files/mysql.spec.sh index 4fb271c6c02..501403a0880 100644 --- a/support-files/mysql.spec.sh +++ b/support-files/mysql.spec.sh @@ -466,7 +466,16 @@ chown -R %{mysqld_user}:%{mysqld_group} $mysql_datadir %{_bindir}/mysql_install_db --rpm --user=%{mysqld_user} # Upgrade databases if needed -%{_bindir}/mysql_upgrade --user=%{mysqld_user} +# This must be done as database user "root", who should be password-protected, +# but this password is not available here. +# So ensure the server is isolated as much as possible, and start it so that +# passwords are not checked. +# See the related change in the start script "/etc/init.d/mysql". +chmod 700 $mysql_datadir +%{_sysconfdir}/init.d/mysql start --skip-networking --skip-grant-tables +%{_bindir}/mysql_upgrade +%{_sysconfdir}/init.d/mysql stop --skip-networking --skip-grant-tables +chmod 755 $mysql_datadir # Change permissions again to fix any new files. chown -R %{mysqld_user}:%{mysqld_group} $mysql_datadir @@ -679,6 +688,11 @@ fi # itself - note that they must be ordered by date (important when # merging BK trees) %changelog +* Tue Jun 20 2006 Joerg Bruehe + +- To run "mysql_upgrade", we need a running server; + start it in isolation and skip password checks. + * Sat May 20 2006 Kent Boortz - Always compile for PIC, position independent code.