1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-01 03:47:19 +03:00

Many files:

Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
mysqld.cc:
  Change MySQL default isolation level to REPEATABLE READ; note that InnoDB has always had that default, and BDB and MyISAM always run at SERIALIZABLE level anyway


sql/mysqld.cc:
  Change MySQL default isolation level to REPEATABLE READ; note that InnoDB has always had that default, and BDB and MyISAM always run at SERIALIZABLE level anyway
sql/ha_innodb.cc:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
sql/ha_innodb.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/buf0buf.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/dict0dict.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/fil0fil.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/lock0lock.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/os0file.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/os0proc.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/os0thread.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/page0cur.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/page0page.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/read0read.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/rem0rec.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/srv0srv.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/sync0rw.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/sync0sync.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/trx0purge.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/trx0trx.h:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/include/rem0rec.ic:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/btr/btr0btr.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/btr/btr0cur.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/btr/btr0pcur.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/buf/buf0buf.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/buf/buf0flu.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/dict/dict0dict.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/fil/fil0fil.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/fsp/fsp0fsp.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/ibuf/ibuf0ibuf.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/lock/lock0lock.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/mem/mem0dbg.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/os/os0file.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/os/os0proc.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/page/page0cur.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/page/page0page.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/pars/lexyy.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/pars/pars0grm.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/read/read0read.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/row/row0ins.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/row/row0mysql.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/row/row0purge.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/row/row0sel.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/row/row0uins.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/row/row0undo.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/row/row0upd.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/srv/srv0srv.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/srv/srv0start.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/sync/sync0rw.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/sync/sync0sync.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/trx/trx0purge.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
innobase/trx/trx0trx.c:
  Merge InnoDB-4.0.5: new isolation levels READ COMMITTED and READ UNCOMMITTED now supported, selective deadlock resolution
This commit is contained in:
unknown
2002-10-29 23:16:46 +02:00
parent 2d9a473bb6
commit 3cb98f0d66
51 changed files with 1577 additions and 580 deletions

View File

@ -274,6 +274,7 @@ btr_page_create(
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX)); MTR_MEMO_PAGE_X_FIX));
page_create(page, mtr); page_create(page, mtr);
buf_block_align(page)->check_index_page_at_flush = TRUE;
btr_page_set_index_id(page, tree->id, mtr); btr_page_set_index_id(page, tree->id, mtr);
} }
@ -713,6 +714,7 @@ btr_create(
/* Create a new index page on the the allocated segment page */ /* Create a new index page on the the allocated segment page */
page = page_create(frame, mtr); page = page_create(frame, mtr);
buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Set the index id of the page */ /* Set the index id of the page */
btr_page_set_index_id(page, index_id, mtr); btr_page_set_index_id(page, index_id, mtr);
@ -847,6 +849,7 @@ btr_page_reorganize_low(
segment headers, next page-field, etc.) is preserved intact */ segment headers, next page-field, etc.) is preserved intact */
page_create(page, mtr); page_create(page, mtr);
buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Copy the records from the temporary space to the recreated page; /* Copy the records from the temporary space to the recreated page;
do not copy the lock bits yet */ do not copy the lock bits yet */
@ -919,6 +922,7 @@ btr_page_empty(
segment headers, next page-field, etc.) is preserved intact */ segment headers, next page-field, etc.) is preserved intact */
page_create(page, mtr); page_create(page, mtr);
buf_block_align(page)->check_index_page_at_flush = TRUE;
} }
/***************************************************************** /*****************************************************************

View File

@ -121,16 +121,19 @@ btr_cur_latch_leaves(
{ {
ulint left_page_no; ulint left_page_no;
ulint right_page_no; ulint right_page_no;
page_t* get_page;
ut_ad(tree && page && mtr); ut_ad(tree && page && mtr);
if (latch_mode == BTR_SEARCH_LEAF) { if (latch_mode == BTR_SEARCH_LEAF) {
btr_page_get(space, page_no, RW_S_LATCH, mtr); get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
} else if (latch_mode == BTR_MODIFY_LEAF) { } else if (latch_mode == BTR_MODIFY_LEAF) {
btr_page_get(space, page_no, RW_X_LATCH, mtr); get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
} else if (latch_mode == BTR_MODIFY_TREE) { } else if (latch_mode == BTR_MODIFY_TREE) {
@ -138,15 +141,22 @@ btr_cur_latch_leaves(
left_page_no = btr_page_get_prev(page, mtr); left_page_no = btr_page_get_prev(page, mtr);
if (left_page_no != FIL_NULL) { if (left_page_no != FIL_NULL) {
btr_page_get(space, left_page_no, RW_X_LATCH, mtr); get_page = btr_page_get(space, left_page_no,
RW_X_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush =
TRUE;
} }
btr_page_get(space, page_no, RW_X_LATCH, mtr); get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
right_page_no = btr_page_get_next(page, mtr); right_page_no = btr_page_get_next(page, mtr);
if (right_page_no != FIL_NULL) { if (right_page_no != FIL_NULL) {
btr_page_get(space, right_page_no, RW_X_LATCH, mtr); get_page = btr_page_get(space, right_page_no,
RW_X_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush =
TRUE;
} }
} else if (latch_mode == BTR_SEARCH_PREV) { } else if (latch_mode == BTR_SEARCH_PREV) {
@ -157,9 +167,12 @@ btr_cur_latch_leaves(
if (left_page_no != FIL_NULL) { if (left_page_no != FIL_NULL) {
cursor->left_page = btr_page_get(space, left_page_no, cursor->left_page = btr_page_get(space, left_page_no,
RW_S_LATCH, mtr); RW_S_LATCH, mtr);
buf_block_align(
cursor->left_page)->check_index_page_at_flush = TRUE;
} }
btr_page_get(space, page_no, RW_S_LATCH, mtr); get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
} else if (latch_mode == BTR_MODIFY_PREV) { } else if (latch_mode == BTR_MODIFY_PREV) {
@ -169,9 +182,12 @@ btr_cur_latch_leaves(
if (left_page_no != FIL_NULL) { if (left_page_no != FIL_NULL) {
cursor->left_page = btr_page_get(space, left_page_no, cursor->left_page = btr_page_get(space, left_page_no,
RW_X_LATCH, mtr); RW_X_LATCH, mtr);
buf_block_align(
cursor->left_page)->check_index_page_at_flush = TRUE;
} }
btr_page_get(space, page_no, RW_X_LATCH, mtr); get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
} else { } else {
ut_error; ut_error;
} }
@ -274,6 +290,7 @@ btr_cur_search_to_nth_level(
if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
&& !estimate && !estimate
&& mode != PAGE_CUR_LE_OR_EXTENDS
&& btr_search_guess_on_hash(index, info, tuple, mode, && btr_search_guess_on_hash(index, info, tuple, mode,
latch_mode, cursor, latch_mode, cursor,
has_search_latch, mtr)) { has_search_latch, mtr)) {
@ -334,12 +351,18 @@ btr_cur_search_to_nth_level(
rw_latch = RW_NO_LATCH; rw_latch = RW_NO_LATCH;
buf_mode = BUF_GET; buf_mode = BUF_GET;
/* We use these modified search modes on non-leaf levels of the
B-tree. These let us end up in the right B-tree leaf. In that leaf
we use the original search mode. */
if (mode == PAGE_CUR_GE) { if (mode == PAGE_CUR_GE) {
page_mode = PAGE_CUR_L; page_mode = PAGE_CUR_L;
} else if (mode == PAGE_CUR_G) { } else if (mode == PAGE_CUR_G) {
page_mode = PAGE_CUR_LE; page_mode = PAGE_CUR_LE;
} else if (mode == PAGE_CUR_LE) { } else if (mode == PAGE_CUR_LE) {
page_mode = PAGE_CUR_LE; page_mode = PAGE_CUR_LE;
} else if (mode == PAGE_CUR_LE_OR_EXTENDS) {
page_mode = PAGE_CUR_LE_OR_EXTENDS;
} else { } else {
ut_ad(mode == PAGE_CUR_L); ut_ad(mode == PAGE_CUR_L);
page_mode = PAGE_CUR_L; page_mode = PAGE_CUR_L;
@ -391,6 +414,8 @@ retry_page_get:
goto retry_page_get; goto retry_page_get;
} }
buf_block_align(page)->check_index_page_at_flush = TRUE;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
if (rw_latch != RW_NO_LATCH) { if (rw_latch != RW_NO_LATCH) {
buf_page_dbg_add_level(page, SYNC_TREE_NODE); buf_page_dbg_add_level(page, SYNC_TREE_NODE);
@ -543,6 +568,8 @@ btr_cur_open_at_index_side(
ut_ad(0 == ut_dulint_cmp(tree->id, ut_ad(0 == ut_dulint_cmp(tree->id,
btr_page_get_index_id(page))); btr_page_get_index_id(page)));
buf_block_align(page)->check_index_page_at_flush = TRUE;
if (height == ULINT_UNDEFINED) { if (height == ULINT_UNDEFINED) {
/* We are in the root node */ /* We are in the root node */

View File

@ -354,6 +354,7 @@ btr_pcur_move_to_next_page(
ut_ad(next_page_no != FIL_NULL); ut_ad(next_page_no != FIL_NULL);
next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr); next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr);
buf_block_align(next_page)->check_index_page_at_flush = TRUE;
btr_leaf_page_release(page, cursor->latch_mode, mtr); btr_leaf_page_release(page, cursor->latch_mode, mtr);

View File

@ -331,6 +331,11 @@ buf_page_print(
index->table_name, index->table_name,
index->name); index->name);
} }
} else if (fil_page_get_type(read_buf) == FIL_PAGE_INODE) {
fprintf(stderr, "InnoDB: Page may be an 'inode' page\n");
} else if (fil_page_get_type(read_buf) == FIL_PAGE_IBUF_FREE_LIST) {
fprintf(stderr,
"InnoDB: Page may be an insert buffer free list page\n");
} }
} }
@ -351,6 +356,8 @@ buf_block_init(
block->file_page_was_freed = FALSE; block->file_page_was_freed = FALSE;
block->check_index_page_at_flush = FALSE;
rw_lock_create(&(block->lock)); rw_lock_create(&(block->lock));
ut_ad(rw_lock_validate(&(block->lock))); ut_ad(rw_lock_validate(&(block->lock)));
@ -616,6 +623,29 @@ buf_page_peek_block(
return(block); return(block);
} }
/************************************************************************
Resets the check_index_page_at_flush field of a page if found in the buffer
pool. */
void
buf_reset_check_index_page_at_flush(
/*================================*/
ulint space, /* in: space id */
ulint offset) /* in: page number */
{
buf_block_t* block;
mutex_enter_fast(&(buf_pool->mutex));
block = buf_page_hash_get(space, offset);
if (block) {
block->check_index_page_at_flush = FALSE;
}
mutex_exit(&(buf_pool->mutex));
}
/************************************************************************ /************************************************************************
Returns the current state of is_hashed of a page. FALSE if the page is Returns the current state of is_hashed of a page. FALSE if the page is
not in the pool. NOTE that this operation does not fix the page in the not in the pool. NOTE that this operation does not fix the page in the
@ -1185,6 +1215,8 @@ buf_page_init(
block->space = space; block->space = space;
block->offset = offset; block->offset = offset;
block->check_index_page_at_flush = FALSE;
block->lock_hash_val = lock_rec_hash(space, offset); block->lock_hash_val = lock_rec_hash(space, offset);
block->lock_mutex = NULL; block->lock_mutex = NULL;

View File

@ -15,6 +15,7 @@ Created 11/11/1995 Heikki Tuuri
#include "ut0byte.h" #include "ut0byte.h"
#include "ut0lst.h" #include "ut0lst.h"
#include "page0page.h"
#include "fil0fil.h" #include "fil0fil.h"
#include "buf0buf.h" #include "buf0buf.h"
#include "buf0lru.h" #include "buf0lru.h"
@ -225,6 +226,24 @@ buf_flush_buffered_writes(void)
return; return;
} }
for (i = 0; i < trx_doublewrite->first_free; i++) {
block = trx_doublewrite->buf_block_arr[i];
if (block->check_index_page_at_flush
&& !page_simple_validate(block->frame)) {
buf_page_print(block->frame);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Apparent corruption of an index page\n"
"InnoDB: to be written to data file. We intentionally crash server\n"
"InnoDB: to prevent corrupt data from ending up in data\n"
"InnoDB: files.\n");
ut_a(0);
}
}
if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
} else { } else {

View File

@ -29,7 +29,14 @@ Created 1/8/1996 Heikki Tuuri
dict_sys_t* dict_sys = NULL; /* the dictionary system */ dict_sys_t* dict_sys = NULL; /* the dictionary system */
rw_lock_t dict_foreign_key_check_lock; rw_lock_t dict_operation_lock; /* table create, drop, etc. reserve
this in X-mode, implicit or backround
operations purge, rollback, foreign
key checks reserve this in S-mode; we
cannot trust that MySQL protects
implicit or background operations
from dropping a table: this is our
mechanism */
#define DICT_HEAP_SIZE 100 /* initial memory heap size when #define DICT_HEAP_SIZE 100 /* initial memory heap size when
creating a table or index object */ creating a table or index object */
@ -509,9 +516,8 @@ dict_init(void)
UT_LIST_INIT(dict_sys->table_LRU); UT_LIST_INIT(dict_sys->table_LRU);
rw_lock_create(&dict_foreign_key_check_lock); rw_lock_create(&dict_operation_lock);
rw_lock_set_level(&dict_foreign_key_check_lock, rw_lock_set_level(&dict_operation_lock, SYNC_DICT_OPERATION);
SYNC_FOREIGN_KEY_CHECK);
} }
/************************************************************************** /**************************************************************************
@ -1851,14 +1857,14 @@ loop:
/************************************************************************* /*************************************************************************
Accepts a specified string. Comparisons are case-insensitive. */ Accepts a specified string. Comparisons are case-insensitive. */
static
char* char*
dict_accept( dict_accept(
/*========*/ /*========*/
/* out: if string was accepted, the pointer /* out: if string was accepted, the pointer
is moved after that, else ptr is returned */ is moved after that, else ptr is returned */
char* ptr, /* in: scan from this */ char* ptr, /* in: scan from this */
const char* string, /* in: accept only this string as the next const char* string,/* in: accept only this string as the next
non-whitespace string */ non-whitespace string */
ibool* success)/* out: TRUE if accepted */ ibool* success)/* out: TRUE if accepted */
{ {

View File

@ -967,6 +967,7 @@ fil_extend_last_data_file(
fil_node_t* node; fil_node_t* node;
fil_space_t* space; fil_space_t* space;
fil_system_t* system = fil_system; fil_system_t* system = fil_system;
byte* buf2;
byte* buf; byte* buf;
ibool success; ibool success;
ulint i; ulint i;
@ -981,19 +982,23 @@ fil_extend_last_data_file(
fil_node_prepare_for_io(node, system, space); fil_node_prepare_for_io(node, system, space);
buf = mem_alloc(1024 * 1024); buf2 = mem_alloc(1024 * 1024 + UNIV_PAGE_SIZE);
buf = ut_align(buf2, UNIV_PAGE_SIZE);
memset(buf, '\0', 1024 * 1024); memset(buf, '\0', 1024 * 1024);
for (i = 0; i < size_increase / ((1024 * 1024) / UNIV_PAGE_SIZE); i++) { for (i = 0; i < size_increase / ((1024 * 1024) / UNIV_PAGE_SIZE); i++) {
success = os_file_write(node->name, node->handle, buf, /* If we use native Windows aio, then also this write is
done using it */
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
node->name, node->handle, buf,
(node->size << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF, (node->size << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF,
node->size >> (32 - UNIV_PAGE_SIZE_SHIFT), node->size >> (32 - UNIV_PAGE_SIZE_SHIFT),
1024 * 1024); 1024 * 1024, NULL, NULL);
if (!success) { if (!success) {
break; break;
} }
@ -1003,7 +1008,7 @@ fil_extend_last_data_file(
os_has_said_disk_full = FALSE; os_has_said_disk_full = FALSE;
} }
mem_free(buf); mem_free(buf2);
fil_node_complete_io(node, system, OS_FILE_WRITE); fil_node_complete_io(node, system, OS_FILE_WRITE);
@ -1528,7 +1533,6 @@ fil_page_set_type(
ulint type) /* in: type */ ulint type) /* in: type */
{ {
ut_ad(page); ut_ad(page);
ut_ad((type == FIL_PAGE_INDEX) || (type == FIL_PAGE_UNDO_LOG));
mach_write_to_2(page + FIL_PAGE_TYPE, type); mach_write_to_2(page + FIL_PAGE_TYPE, type);
} }

View File

@ -769,6 +769,8 @@ fsp_init_file_page_low(
#endif #endif
page = buf_frame_align(ptr); page = buf_frame_align(ptr);
buf_block_align(page)->check_index_page_at_flush = FALSE;
#ifdef UNIV_BASIC_LOG_DEBUG #ifdef UNIV_BASIC_LOG_DEBUG
/* printf("In log debug version: Erase the contents of the file page\n"); /* printf("In log debug version: Erase the contents of the file page\n");
*/ */
@ -1097,7 +1099,7 @@ fsp_fill_free_list(
/* Initialize the ibuf page in a separate /* Initialize the ibuf page in a separate
mini-transaction because it is low in the latching mini-transaction because it is low in the latching
order, and we must be able to release the its latch order, and we must be able to release its latch
before returning from the fsp routine */ before returning from the fsp routine */
mtr_start(&ibuf_mtr); mtr_start(&ibuf_mtr);
@ -1264,7 +1266,12 @@ fsp_alloc_free_page(
free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE, free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE,
hint % FSP_EXTENT_SIZE, mtr); hint % FSP_EXTENT_SIZE, mtr);
ut_a(free != ULINT_UNDEFINED); if (free == ULINT_UNDEFINED) {
ut_print_buf(((byte*)descr) - 500, 1000);
ut_a(0);
}
xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr); xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
@ -1412,7 +1419,12 @@ fsp_free_extent(
descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr); descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
ut_a(xdes_get_state(descr, mtr) != XDES_FREE); if (xdes_get_state(descr, mtr) == XDES_FREE) {
ut_print_buf(((byte*)descr) - 500, 1000);
ut_a(0);
}
xdes_init(descr, mtr); xdes_init(descr, mtr);
@ -1523,6 +1535,10 @@ fsp_alloc_seg_inode_page(
page = buf_page_get(space, page_no, RW_X_LATCH, mtr); page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
buf_block_align(page)->check_index_page_at_flush = FALSE;
fil_page_set_type(page, FIL_PAGE_INODE);
buf_page_dbg_add_level(page, SYNC_FSP_PAGE); buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) { for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {
@ -2298,6 +2314,8 @@ fseg_alloc_free_page_low(
fseg_mark_page_used(seg_inode, space, ret_page, mtr); fseg_mark_page_used(seg_inode, space, ret_page, mtr);
} }
buf_reset_check_index_page_at_flush(space, ret_page);
return(ret_page); return(ret_page);
} }

View File

@ -1295,6 +1295,8 @@ ibuf_add_free_page(
flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
fil_page_set_type(page, FIL_PAGE_IBUF_FREE_LIST);
ibuf_data->seg_size++; ibuf_data->seg_size++;
ibuf_data->free_list_len++; ibuf_data->free_list_len++;
@ -1305,6 +1307,7 @@ ibuf_add_free_page(
ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
TRUE, &mtr); TRUE, &mtr);
mtr_commit(&mtr); mtr_commit(&mtr);
mutex_exit(&ibuf_mutex); mutex_exit(&ibuf_mutex);

View File

@ -274,6 +274,15 @@ buf_page_peek_block(
ulint space, /* in: space id */ ulint space, /* in: space id */
ulint offset);/* in: page number */ ulint offset);/* in: page number */
/************************************************************************ /************************************************************************
Resets the check_index_page_at_flush field of a page if found in the buffer
pool. */
void
buf_reset_check_index_page_at_flush(
/*================================*/
ulint space, /* in: space id */
ulint offset);/* in: page number */
/************************************************************************
Sets file_page_was_freed TRUE if the page is found in the buffer pool. Sets file_page_was_freed TRUE if the page is found in the buffer pool.
This function should be called when we free a file page and want the This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless debug version to check that it is not accessed any more unless
@ -648,6 +657,14 @@ struct buf_block_struct{
then it can wait for this rw-lock */ then it can wait for this rw-lock */
buf_block_t* hash; /* node used in chaining to the page buf_block_t* hash; /* node used in chaining to the page
hash table */ hash table */
ibool check_index_page_at_flush;
/* TRUE if we know that this is
an index page, and want the database
to check its consistency before flush;
note that there may be pages in the
buffer pool which are index pages,
but this flag is not set because
we do not keep track of all pages */
/* 2. Page flushing fields */ /* 2. Page flushing fields */
UT_LIST_NODE_T(buf_block_t) flush_list; UT_LIST_NODE_T(buf_block_t) flush_list;

View File

@ -26,6 +26,18 @@ Created 1/8/1996 Heikki Tuuri
#include "ut0byte.h" #include "ut0byte.h"
#include "trx0types.h" #include "trx0types.h"
/*************************************************************************
Accepts a specified string. Comparisons are case-insensitive. */
char*
dict_accept(
/*========*/
/* out: if string was accepted, the pointer
is moved after that, else ptr is returned */
char* ptr, /* in: scan from this */
const char* string,/* in: accept only this string as the next
non-whitespace string */
ibool* success);/* out: TRUE if accepted */
/************************************************************************ /************************************************************************
Decrements the count of open MySQL handles to a table. */ Decrements the count of open MySQL handles to a table. */
@ -798,7 +810,7 @@ dict_mutex_exit_for_mysql(void);
extern dict_sys_t* dict_sys; /* the dictionary system */ extern dict_sys_t* dict_sys; /* the dictionary system */
extern rw_lock_t dict_foreign_key_check_lock; extern rw_lock_t dict_operation_lock;
/* Dictionary system struct */ /* Dictionary system struct */
struct dict_sys_struct{ struct dict_sys_struct{

View File

@ -73,6 +73,8 @@ extern fil_addr_t fil_addr_null;
/* File page types */ /* File page types */
#define FIL_PAGE_INDEX 17855 #define FIL_PAGE_INDEX 17855
#define FIL_PAGE_UNDO_LOG 2 #define FIL_PAGE_UNDO_LOG 2
#define FIL_PAGE_INODE 3
#define FIL_PAGE_IBUF_FREE_LIST 4
/* Space types */ /* Space types */
#define FIL_TABLESPACE 501 #define FIL_TABLESPACE 501

View File

@ -292,6 +292,27 @@ lock_sec_rec_modify_check_and_lock(
dict_index_t* index, /* in: secondary index */ dict_index_t* index, /* in: secondary index */
que_thr_t* thr); /* in: query thread */ que_thr_t* thr); /* in: query thread */
/************************************************************************* /*************************************************************************
Like the counterpart for a clustered index below, but now we read a
secondary index record. */
ulint
lock_sec_rec_read_check_and_lock(
/*=============================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
rec_t* rec, /* in: user record or page supremum record
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: secondary index */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks if locks of other transactions prevent an immediate read, or passing Checks if locks of other transactions prevent an immediate read, or passing
over by a read cursor, of a clustered index record. If they do, first tests over by a read cursor, of a clustered index record. If they do, first tests
if the query thread should anyway be suspended for some reason; if not, then if the query thread should anyway be suspended for some reason; if not, then
@ -313,25 +334,8 @@ lock_clust_rec_read_check_and_lock(
ulint mode, /* in: mode of the lock which the read cursor ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */ latter is possible in SELECT FOR UPDATE */
que_thr_t* thr); /* in: query thread */ ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
/************************************************************************* LOCK_REC_NOT_GAP */
Like the counterpart for a clustered index above, but now we read a
secondary index record. */
ulint
lock_sec_rec_read_check_and_lock(
/*=============================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
rec_t* rec, /* in: user record or page supremum record
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: secondary index */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
que_thr_t* thr); /* in: query thread */ que_thr_t* thr); /* in: query thread */
/************************************************************************* /*************************************************************************
Checks that a record is seen in a consistent read. */ Checks that a record is seen in a consistent read. */
@ -509,6 +513,7 @@ lock_validate(void);
extern lock_sys_t* lock_sys; extern lock_sys_t* lock_sys;
/* Lock modes and types */ /* Lock modes and types */
/* Basic modes */
#define LOCK_NONE 0 /* this flag is used elsewhere to note #define LOCK_NONE 0 /* this flag is used elsewhere to note
consistent read */ consistent read */
#define LOCK_IS 2 /* intention shared */ #define LOCK_IS 2 /* intention shared */
@ -519,15 +524,20 @@ extern lock_sys_t* lock_sys;
in an exclusive mode */ in an exclusive mode */
#define LOCK_MODE_MASK 0xF /* mask used to extract mode from the #define LOCK_MODE_MASK 0xF /* mask used to extract mode from the
type_mode field in a lock */ type_mode field in a lock */
/* Lock types */
#define LOCK_TABLE 16 /* these type values should be so high that */ #define LOCK_TABLE 16 /* these type values should be so high that */
#define LOCK_REC 32 /* they can be ORed to the lock mode */ #define LOCK_REC 32 /* they can be ORed to the lock mode */
#define LOCK_TYPE_MASK 0xF0 /* mask used to extract lock type from the #define LOCK_TYPE_MASK 0xF0 /* mask used to extract lock type from the
type_mode field in a lock */ type_mode field in a lock */
/* Waiting lock flag */
#define LOCK_WAIT 256 /* this wait bit should be so high that #define LOCK_WAIT 256 /* this wait bit should be so high that
it can be ORed to the lock mode and type; it can be ORed to the lock mode and type;
when this bit is set, it means that the when this bit is set, it means that the
lock has not yet been granted, it is just lock has not yet been granted, it is just
waiting for its turn in the wait queue */ waiting for its turn in the wait queue */
/* Precise modes */
#define LOCK_ORDINARY 0 /* this flag denotes an ordinary next-key lock
in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */
#define LOCK_GAP 512 /* this gap bit should be so high that #define LOCK_GAP 512 /* this gap bit should be so high that
it can be ORed to the other flags; it can be ORed to the other flags;
when this bit is set, it means that the when this bit is set, it means that the
@ -537,7 +547,15 @@ extern lock_sys_t* lock_sys;
the bit is set; locks of this type are created the bit is set; locks of this type are created
when records are removed from the index chain when records are removed from the index chain
of records */ of records */
#define LOCK_INSERT_INTENTION 1024 /* this bit is set when we place a waiting #define LOCK_REC_NOT_GAP 1024 /* this bit means that the lock is only on
the index record and does NOT block inserts
to the gap before the index record; this is
used in the case when we retrieve a record
with a unique key, and is also used in
locking plain SELECTs (not part of UPDATE
or DELETE) when the user has set the READ
COMMITTED isolation level */
#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting
gap type record lock request in order to let gap type record lock request in order to let
an insert of an index record to wait until an insert of an index record to wait until
there are no conflicting locks by other there are no conflicting locks by other

View File

@ -111,6 +111,7 @@ log. */
#define OS_WIN31 1 #define OS_WIN31 1
#define OS_WIN95 2 #define OS_WIN95 2
#define OS_WINNT 3 #define OS_WINNT 3
#define OS_WIN2000 4
extern ulint os_n_file_reads; extern ulint os_n_file_reads;
extern ulint os_n_file_writes; extern ulint os_n_file_writes;
@ -122,7 +123,7 @@ Gets the operating system version. Currently works only on Windows. */
ulint ulint
os_get_os_version(void); os_get_os_version(void);
/*===================*/ /*===================*/
/* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */ /* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
/******************************************************************** /********************************************************************
Creates the seek mutexes used in positioned reads and writes. */ Creates the seek mutexes used in positioned reads and writes. */

View File

@ -15,6 +15,15 @@ Created 9/30/1995 Heikki Tuuri
typedef void* os_process_t; typedef void* os_process_t;
typedef unsigned long int os_process_id_t; typedef unsigned long int os_process_id_t;
/********************************************************************
Converts the current process id to a number. It is not guaranteed that the
number is unique. In Linux returns the 'process number' of the current
thread. That number is the same as one sees in 'top', for example. In Linux
the thread id is not the same as one sees in 'top'. */
ulint
os_proc_get_number(void);
/*====================*/
/******************************************************************** /********************************************************************
Allocates non-cacheable memory. */ Allocates non-cacheable memory. */

View File

@ -16,11 +16,8 @@ Created 9/8/1995 Heikki Tuuri
this is also the size of the wait slot array for MySQL threads which this is also the size of the wait slot array for MySQL threads which
can wait inside InnoDB */ can wait inside InnoDB */
#ifdef __WIN__ #ifdef __WIN__
/* Windows 95/98/ME seemed to have difficulties creating the all /* Create less event semaphores because Win 98/ME had difficult creating
the event semaphores for the wait array slots. If the computer had 40000 event semaphores */
<= 64 MB memory, InnoDB startup could take minutes or even crash.
That is why we set this to only 1000 in Windows. */
#define OS_THREAD_MAX_N 1000 #define OS_THREAD_MAX_N 1000
#else #else
#define OS_THREAD_MAX_N 10000 #define OS_THREAD_MAX_N 10000

View File

@ -26,7 +26,12 @@ Created 10/4/1994 Heikki Tuuri
#define PAGE_CUR_GE 2 #define PAGE_CUR_GE 2
#define PAGE_CUR_L 3 #define PAGE_CUR_L 3
#define PAGE_CUR_LE 4 #define PAGE_CUR_LE 4
#define PAGE_CUR_DBG 5 #define PAGE_CUR_LE_OR_EXTENDS 5 /* This is a search mode used in
"column LIKE 'abc%' ORDER BY column DESC";
we have to find strings which are <= 'abc' or
which extend it */
#define PAGE_CUR_DBG 6
extern ulint page_cur_short_succ; extern ulint page_cur_short_succ;

View File

@ -666,6 +666,16 @@ page_rec_validate(
/* out: TRUE if ok */ /* out: TRUE if ok */
rec_t* rec); /* in: record on the page */ rec_t* rec); /* in: record on the page */
/******************************************************************* /*******************************************************************
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage. */
ibool
page_simple_validate(
/*=================*/
/* out: TRUE if ok */
page_t* page); /* in: index page */
/*******************************************************************
This function checks the consistency of an index page. */ This function checks the consistency of an index page. */
ibool ibool

View File

@ -45,6 +45,14 @@ read_view_close(
/*============*/ /*============*/
read_view_t* view); /* in: read view */ read_view_t* view); /* in: read view */
/************************************************************************* /*************************************************************************
Closes a consistent read view for MySQL. This function is called at an SQL
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
void
read_view_close_for_mysql(
/*======================*/
trx_t* trx); /* in: trx which has a read view */
/*************************************************************************
Checks if a read view sees the specified transaction. */ Checks if a read view sees the specified transaction. */
UNIV_INLINE UNIV_INLINE
ibool ibool

View File

@ -148,12 +148,22 @@ data field in the record. */
byte* byte*
rec_get_nth_field( rec_get_nth_field(
/*==============*/ /*==============*/
/* out: pointer to the field, NULL if SQL null */ /* out: pointer to the field */
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
ulint n, /* in: index of the field */ ulint n, /* in: index of the field */
ulint* len); /* out: length of the field; UNIV_SQL_NULL ulint* len); /* out: length of the field; UNIV_SQL_NULL
if SQL null */ if SQL null */
/**************************************************************** /****************************************************************
Return field length or UNIV_SQL_NULL. */
UNIV_INLINE
ulint
rec_get_nth_field_len(
/*==================*/
/* out: length of the field; UNIV_SQL_NULL if SQL
null */
rec_t* rec, /* in: record */
ulint n); /* in: index of the field */
/****************************************************************
Gets the physical size of a field. Also an SQL null may have a field of Gets the physical size of a field. Also an SQL null may have a field of
size > 0, if the data type is of a fixed size. */ size > 0, if the data type is of a fixed size. */
UNIV_INLINE UNIV_INLINE

View File

@ -65,6 +65,24 @@ a field stored to another page: */
#define REC_2BYTE_EXTERN_MASK 0x4000 #define REC_2BYTE_EXTERN_MASK 0x4000
/****************************************************************
Return field length or UNIV_SQL_NULL. */
UNIV_INLINE
ulint
rec_get_nth_field_len(
/*==================*/
/* out: length of the field; UNIV_SQL_NULL if SQL
null */
rec_t* rec, /* in: record */
ulint n) /* in: index of the field */
{
ulint len;
rec_get_nth_field(rec, n, &len);
return(len);
}
/*************************************************************** /***************************************************************
Sets the value of the ith field SQL null bit. */ Sets the value of the ith field SQL null bit. */

View File

@ -57,8 +57,6 @@ extern ulint srv_flush_log_at_trx_commit;
extern byte srv_latin1_ordering[256];/* The sort order table of the latin1 extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
character set */ character set */
extern ibool srv_use_native_aio;
extern ulint srv_pool_size; extern ulint srv_pool_size;
extern ulint srv_mem_pool_size; extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size; extern ulint srv_lock_table_size;
@ -70,8 +68,9 @@ extern dulint srv_archive_recovery_limit_lsn;
extern ulint srv_lock_wait_timeout; extern ulint srv_lock_wait_timeout;
extern char* srv_unix_file_flush_method_str; extern char* srv_file_flush_method_str;
extern ulint srv_unix_file_flush_method; extern ulint srv_unix_file_flush_method;
extern ulint srv_win_file_flush_method;
extern ulint srv_force_recovery; extern ulint srv_force_recovery;
extern ulint srv_thread_concurrency; extern ulint srv_thread_concurrency;
@ -154,13 +153,19 @@ typedef struct srv_sys_struct srv_sys_t;
/* The server system */ /* The server system */
extern srv_sys_t* srv_sys; extern srv_sys_t* srv_sys;
/* Alternatives for the field flush option in Unix; see the InnoDB manual about /* Alternatives for the file flush option in Unix; see the InnoDB manual about
what these mean */ what these mean */
#define SRV_UNIX_FDATASYNC 1 #define SRV_UNIX_FDATASYNC 1 /* This is the default; it is currently mapped
to a call of fsync() because fdatasync()
seemed to corrupt files in Linux and Solaris */
#define SRV_UNIX_O_DSYNC 2 #define SRV_UNIX_O_DSYNC 2
#define SRV_UNIX_LITTLESYNC 3 #define SRV_UNIX_LITTLESYNC 3
#define SRV_UNIX_NOSYNC 4 #define SRV_UNIX_NOSYNC 4
/* Alternatives for file i/o in Windows */
#define SRV_WIN_IO_NORMAL 1
#define SRV_WIN_IO_UNBUFFERED 2 /* This is the default */
/* Alternatives for srv_force_recovery. Non-zero values are intended /* Alternatives for srv_force_recovery. Non-zero values are intended
to help the user get a damaged database up so that he can dump intact to help the user get a damaged database up so that he can dump intact
tables and rows with SELECT INTO OUTFILE. The database must not otherwise tables and rows with SELECT INTO OUTFILE. The database must not otherwise
@ -311,15 +316,17 @@ srv_conc_exit_innodb(
trx_t* trx); /* in: transaction object associated with the trx_t* trx); /* in: transaction object associated with the
thread */ thread */
/******************************************************************* /*******************************************************************
Puts a MySQL OS thread to wait for a lock to be released. */ Puts a MySQL OS thread to wait for a lock to be released. If an error
occurs during the wait trx->error_state associated with thr is
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
are possible errors. DB_DEADLOCK is returned if selective deadlock
resolution chose this transaction as a victim. */
ibool void
srv_suspend_mysql_thread( srv_suspend_mysql_thread(
/*=====================*/ /*=====================*/
/* out: TRUE if the lock wait timeout was que_thr_t* thr); /* in: query thread associated with the MySQL
exceeded */ OS thread */
que_thr_t* thr); /* in: query thread associated with
the MySQL OS thread */
/************************************************************************ /************************************************************************
Releases a MySQL OS thread waiting for a lock to be released, if the Releases a MySQL OS thread waiting for a lock to be released, if the
thread is already suspended. */ thread is already suspended. */
@ -407,3 +414,4 @@ struct srv_sys_struct{
extern ulint srv_n_threads_active[]; extern ulint srv_n_threads_active[];
#endif #endif

View File

@ -335,7 +335,8 @@ ibool
rw_lock_own( rw_lock_own(
/*========*/ /*========*/
rw_lock_t* lock, /* in: rw-lock */ rw_lock_t* lock, /* in: rw-lock */
ulint lock_type); /* in: lock type */ ulint lock_type); /* in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
/********************************************************************** /**********************************************************************
Checks if somebody has locked the rw-lock in the specified mode. */ Checks if somebody has locked the rw-lock in the specified mode. */

View File

@ -371,10 +371,12 @@ or row lock! */
#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress #define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress
latching order checking */ latching order checking */
#define SYNC_LEVEL_NONE 2000 /* default: level not defined */ #define SYNC_LEVEL_NONE 2000 /* default: level not defined */
#define SYNC_FOREIGN_KEY_CHECK 1001 #define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve
this in X-mode, implicit or backround
operations purge, rollback, foreign
key checks reserve this in S-mode */
#define SYNC_DICT 1000 #define SYNC_DICT 1000
#define SYNC_DICT_AUTOINC_MUTEX 999 #define SYNC_DICT_AUTOINC_MUTEX 999
#define SYNC_PURGE_IS_RUNNING 997
#define SYNC_DICT_HEADER 995 #define SYNC_DICT_HEADER 995
#define SYNC_IBUF_HEADER 914 #define SYNC_IBUF_HEADER 914
#define SYNC_IBUF_PESS_INSERT_MUTEX 912 #define SYNC_IBUF_PESS_INSERT_MUTEX 912

View File

@ -111,9 +111,6 @@ struct trx_purge_struct{
of the trx system and it never ends */ of the trx system and it never ends */
que_t* query; /* The query graph which will do the que_t* query; /* The query graph which will do the
parallelized purge operation */ parallelized purge operation */
rw_lock_t purge_is_running;/* Purge operation set an x-latch here
while it is accessing a table: this
prevents dropping of the table */
rw_lock_t latch; /* The latch protecting the purge view. rw_lock_t latch; /* The latch protecting the purge view.
A purge operation must acquire an A purge operation must acquire an
x-latch here for the instant at which x-latch here for the instant at which

View File

@ -327,6 +327,7 @@ struct trx_struct{
time_t start_time; /* time the trx object was created time_t start_time; /* time the trx object was created
or the state last time became or the state last time became
TRX_ACTIVE */ TRX_ACTIVE */
ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
ibool check_foreigns; /* normally TRUE, but if the user ibool check_foreigns; /* normally TRUE, but if the user
wants to suppress foreign key checks, wants to suppress foreign key checks,
(in table imports, for example) we (in table imports, for example) we
@ -350,6 +351,9 @@ struct trx_struct{
/*------------------------------*/ /*------------------------------*/
void* mysql_thd; /* MySQL thread handle corresponding void* mysql_thd; /* MySQL thread handle corresponding
to this trx, or NULL */ to this trx, or NULL */
char** mysql_query_str;/* pointer to the field in mysqld_thd
which contains the pointer to the
current SQL query string */
char* mysql_log_file_name; char* mysql_log_file_name;
/* if MySQL binlog is used, this field /* if MySQL binlog is used, this field
contains a pointer to the latest file contains a pointer to the latest file
@ -371,6 +375,9 @@ struct trx_struct{
replication has processed */ replication has processed */
os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
with this transaction object */ with this transaction object */
ulint mysql_process_no;/* since in Linux, 'top' reports
process id's and not thread id's, we
store the process number too */
/*------------------------------*/ /*------------------------------*/
ulint n_mysql_tables_in_use; /* number of Innobase tables ulint n_mysql_tables_in_use; /* number of Innobase tables
used in the processing of the current used in the processing of the current
@ -379,9 +386,9 @@ struct trx_struct{
/* how many tables the current SQL /* how many tables the current SQL
statement uses, except those statement uses, except those
in consistent read */ in consistent read */
ibool has_dict_foreign_key_check_lock; ibool has_dict_operation_lock;
/* TRUE if the trx currently holds /* TRUE if the trx currently holds
an s-lock on dict_foreign_... */ an s-lock on dict_operation_lock */
ibool has_search_latch; ibool has_search_latch;
/* TRUE if this trx has latched the /* TRUE if this trx has latched the
search system latch in S-mode */ search system latch in S-mode */
@ -523,6 +530,41 @@ struct trx_struct{
#define TRX_QUE_ROLLING_BACK 3 /* transaction is rolling back */ #define TRX_QUE_ROLLING_BACK 3 /* transaction is rolling back */
#define TRX_QUE_COMMITTING 4 /* transaction is committing */ #define TRX_QUE_COMMITTING 4 /* transaction is committing */
/* Transaction isolation levels */
#define TRX_ISO_READ_UNCOMMITTED 1 /* dirty read: non-locking
SELECTs are performed so that
we do not look at a possible
earlier version of a record;
thus they are not 'consistent'
reads under this isolation
level; otherwise like level
2 */
#define TRX_ISO_READ_COMMITTED 2 /* somewhat Oracle-like
isolation, except that in
range UPDATE and DELETE we
must block phantom rows
with next-key locks;
SELECT ... FOR UPDATE and ...
LOCK IN SHARE MODE only lock
the index records, NOT the
gaps before them, and thus
allow free inserting;
each consistent read reads its
own snapshot */
#define TRX_ISO_REPEATABLE_READ 3 /* this is the default;
all consistent reads in the
same trx read the same
snapshot;
full next-key locking used
in locking reads to block
insertions into gaps */
#define TRX_ISO_SERIALIZABLE 4 /* all plain SELECTs are
converted to LOCK IN SHARE
MODE reads */
/* Types of a trx signal */ /* Types of a trx signal */
#define TRX_SIG_NO_SIGNAL 100 #define TRX_SIG_NO_SIGNAL 100
#define TRX_SIG_TOTAL_ROLLBACK 1 #define TRX_SIG_TOTAL_ROLLBACK 1

View File

@ -70,6 +70,11 @@ A waiting record lock can also be of the gap type. A waiting lock request
can be granted when there is no conflicting mode lock request by another can be granted when there is no conflicting mode lock request by another
transaction ahead of it in the explicit lock queue. transaction ahead of it in the explicit lock queue.
In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
It only locks the record it is placed on, not the gap before the record.
This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
level.
------------------------------------------------------------------------- -------------------------------------------------------------------------
RULE 1: If there is an implicit x-lock on a record, and there are non-gap RULE 1: If there is an implicit x-lock on a record, and there are non-gap
------- -------
@ -294,7 +299,9 @@ struct lock_struct{
UT_LIST_NODE_T(lock_t) UT_LIST_NODE_T(lock_t)
trx_locks; /* list of the locks of the trx_locks; /* list of the locks of the
transaction */ transaction */
ulint type_mode; /* lock type, mode, gap flag, and ulint type_mode; /* lock type, mode, LOCK_GAP or
LOCK_REC_NOT_GAP,
LOCK_INSERT_INTENTION,
wait flag, ORed */ wait flag, ORed */
hash_node_t hash; /* hash chain node for a record lock */ hash_node_t hash; /* hash chain node for a record lock */
dict_index_t* index; /* index for a record lock */ dict_index_t* index; /* index for a record lock */
@ -309,6 +316,10 @@ Monitor will then fetch it and print */
ibool lock_deadlock_found = FALSE; ibool lock_deadlock_found = FALSE;
char* lock_latest_err_buf; /* We allocate 5000 bytes for this */ char* lock_latest_err_buf; /* We allocate 5000 bytes for this */
/* Flags for recursive deadlock search */
#define LOCK_VICTIM_IS_START 1
#define LOCK_VICTIM_IS_OTHER 2
/************************************************************************ /************************************************************************
Checks if a lock request results in a deadlock. */ Checks if a lock request results in a deadlock. */
static static
@ -700,23 +711,23 @@ lock_rec_get_gap(
} }
/************************************************************************* /*************************************************************************
Sets the gap flag of a record lock. */ Gets the LOCK_REC_NOT_GAP flag of a record lock. */
UNIV_INLINE UNIV_INLINE
void ibool
lock_rec_set_gap( lock_rec_get_rec_not_gap(
/*=============*/ /*=====================*/
lock_t* lock, /* in: record lock */ /* out: TRUE if LOCK_REC_NOT_GAP flag set */
ibool val) /* in: value to set: TRUE or FALSE */ lock_t* lock) /* in: record lock */
{ {
ut_ad(lock); ut_ad(lock);
ut_ad((val == TRUE) || (val == FALSE));
ut_ad(lock_get_type(lock) == LOCK_REC); ut_ad(lock_get_type(lock) == LOCK_REC);
if (val) { if (lock->type_mode & LOCK_REC_NOT_GAP) {
lock->type_mode = lock->type_mode | LOCK_GAP;
} else { return(TRUE);
lock->type_mode = lock->type_mode & ~LOCK_GAP;
} }
return(FALSE);
} }
/************************************************************************* /*************************************************************************
@ -739,26 +750,6 @@ lock_rec_get_insert_intention(
return(FALSE); return(FALSE);
} }
/*************************************************************************
Sets the waiting insert flag of a record lock. */
UNIV_INLINE
void
lock_rec_set_insert_intention(
/*==========================*/
lock_t* lock, /* in: record lock */
ibool val) /* in: value to set: TRUE or FALSE */
{
ut_ad(lock);
ut_ad((val == TRUE) || (val == FALSE));
ut_ad(lock_get_type(lock) == LOCK_REC);
if (val) {
lock->type_mode = lock->type_mode | LOCK_INSERT_INTENTION;
} else {
lock->type_mode = lock->type_mode & ~LOCK_INSERT_INTENTION;
}
}
/************************************************************************* /*************************************************************************
Calculates if lock mode 1 is stronger or equal to lock mode 2. */ Calculates if lock mode 1 is stronger or equal to lock mode 2. */
UNIV_INLINE UNIV_INLINE
@ -848,47 +839,52 @@ lock_rec_has_to_wait(
/* out: TRUE if new lock has to wait for lock2 to be /* out: TRUE if new lock has to wait for lock2 to be
removed */ removed */
trx_t* trx, /* in: trx of new lock */ trx_t* trx, /* in: trx of new lock */
ulint mode, /* in: LOCK_S or LOCK_X */ ulint type_mode,/* in: precise mode of the new lock to set:
ulint gap, /* in: LOCK_GAP or 0 */ LOCK_S or LOCK_X, possibly ORed to
ulint insert_intention, LOCK_GAP or LOCK_REC_NOT_GAP, LOCK_INSERT_INTENTION */
/* in: LOCK_INSERT_INTENTION or 0 */
lock_t* lock2) /* in: another record lock; NOTE that it is assumed lock_t* lock2) /* in: another record lock; NOTE that it is assumed
that this has a lock bit set on the same record as that this has a lock bit set on the same record as
in lock1 */ in the new lock we are setting */
{ {
ut_ad(trx && lock2); ut_ad(trx && lock2);
ut_ad(lock_get_type(lock2) == LOCK_REC); ut_ad(lock_get_type(lock2) == LOCK_REC);
ut_ad(mode == LOCK_S || mode == LOCK_X);
ut_ad(gap == LOCK_GAP || gap == 0);
ut_ad(insert_intention == LOCK_INSERT_INTENTION
|| insert_intention == 0);
if (trx != lock2->trx && !lock_mode_compatible(mode, if (trx != lock2->trx
&& !lock_mode_compatible(LOCK_MODE_MASK & type_mode,
lock_get_mode(lock2))) { lock_get_mode(lock2))) {
/* We have somewhat complex rules when gap type /* We have somewhat complex rules when gap type record locks
record locks cause waits */ cause waits */
if (!gap && lock_rec_get_insert_intention(lock2)) { if ((type_mode & LOCK_REC_NOT_GAP)
&& lock_rec_get_gap(lock2)) {
/* Request of a full next-key record does not /* Lock on just the record does not need to wait for
need to wait for an insert intention lock to be a gap type lock */
removed. This is ok since our rules allow conflicting
locks on gaps. This eliminates a spurious deadlock
caused by a next-key lock waiting for an insert
intention lock; when the insert intention lock was
granted, the insert deadlocked on the waiting
next-key lock. */
return(FALSE); return(FALSE);
} }
if (insert_intention && lock_rec_get_insert_intention(lock2)) { if ((type_mode & LOCK_GAP)
&& lock_rec_get_rec_not_gap(lock2)) {
/* An insert intention is not disturbed by another /* Lock on gap does not need to wait for
insert intention; this removes a spurious deadlock a LOCK_REC_NOT_GAP type lock */
caused by inserts which had to wait for a next-key
lock to be removed */ return(FALSE);
}
if (lock_rec_get_insert_intention(lock2)) {
/* No lock request needs to wait for an insert
intention lock to be removed. This is ok since our
rules allow conflicting locks on gaps. This eliminates
a spurious deadlock caused by a next-key lock waiting
for an insert intention lock; when the insert
intention lock was granted, the insert deadlocked on
the waiting next-key lock.
Also, insert intention locks do not disturb each
other. */
return(FALSE); return(FALSE);
} }
@ -921,10 +917,7 @@ lock_has_to_wait(
ut_ad(lock_get_type(lock2) == LOCK_REC); ut_ad(lock_get_type(lock2) == LOCK_REC);
return(lock_rec_has_to_wait(lock1->trx, return(lock_rec_has_to_wait(lock1->trx,
lock_get_mode(lock1), lock1->type_mode, lock2));
lock_rec_get_gap(lock1),
lock_rec_get_insert_intention(lock1),
lock2));
} }
return(TRUE); return(TRUE);
@ -1386,32 +1379,41 @@ lock_table_has(
/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/ /*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
/************************************************************************* /*************************************************************************
Checks if a transaction has a GRANTED explicit lock on rec, where the gap Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
flag or the insert intention flag is not set, stronger or equal to mode. to precise_mode. */
Note that locks on the supremum of a page are a special case here, since
they are always gap type locks, even if the gap flag is not set in them. */
UNIV_INLINE UNIV_INLINE
lock_t* lock_t*
lock_rec_has_expl( lock_rec_has_expl(
/*==============*/ /*==============*/
/* out: lock or NULL */ /* out: lock or NULL */
ulint mode, /* in: lock mode */ ulint precise_mode,/* in: LOCK_S or LOCK_X possibly ORed to
LOCK_GAP or LOCK_REC_NOT_GAP,
for a supremum record we regard this always a gap
type request */
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
trx_t* trx) /* in: transaction */ trx_t* trx) /* in: transaction */
{ {
lock_t* lock; lock_t* lock;
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
ut_ad((mode == LOCK_X) || (mode == LOCK_S)); ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
|| (precise_mode & LOCK_MODE_MASK) == LOCK_X);
ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
lock = lock_rec_get_first(rec); lock = lock_rec_get_first(rec);
while (lock) { while (lock) {
if (lock->trx == trx if (lock->trx == trx
&& lock_mode_stronger_or_eq(lock_get_mode(lock), mode) && lock_mode_stronger_or_eq(lock_get_mode(lock),
precise_mode & LOCK_MODE_MASK)
&& !lock_get_wait(lock) && !lock_get_wait(lock)
&& !lock_rec_get_insert_intention(lock) && (!lock_rec_get_rec_not_gap(lock)
&& !lock_rec_get_gap(lock)) { || (precise_mode & LOCK_REC_NOT_GAP)
|| page_rec_is_supremum(rec))
&& (!lock_rec_get_gap(lock)
|| (precise_mode & LOCK_GAP)
|| page_rec_is_supremum(rec))
&& (!lock_rec_get_insert_intention(lock))) {
return(lock); return(lock);
} }
@ -1429,7 +1431,7 @@ lock_t*
lock_rec_other_has_expl_req( lock_rec_other_has_expl_req(
/*========================*/ /*========================*/
/* out: lock or NULL */ /* out: lock or NULL */
ulint mode, /* in: lock mode */ ulint mode, /* in: LOCK_S or LOCK_X */
ulint gap, /* in: LOCK_GAP if also gap locks are taken ulint gap, /* in: LOCK_GAP if also gap locks are taken
into account, or 0 if not */ into account, or 0 if not */
ulint wait, /* in: LOCK_WAIT if also waiting locks are ulint wait, /* in: LOCK_WAIT if also waiting locks are
@ -1471,27 +1473,21 @@ lock_t*
lock_rec_other_has_conflicting( lock_rec_other_has_conflicting(
/*===========================*/ /*===========================*/
/* out: lock or NULL */ /* out: lock or NULL */
ulint mode, /* in: lock mode of the lock we are going to reserve */ ulint mode, /* in: LOCK_S or LOCK_X,
ulint gap, /* in: LOCK_GAP if we are going to reserve a gap type possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP,
lock, else 0 */ LOCK_INSERT_INTENTION */
ulint insert_intention,
/* in: LOCK_INSERT_INTENTION if we are going to
reserve an insert intention lock */
rec_t* rec, /* in: record to look at */ rec_t* rec, /* in: record to look at */
trx_t* trx) /* in: our transaction */ trx_t* trx) /* in: our transaction */
{ {
lock_t* lock; lock_t* lock;
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
ut_ad(mode == LOCK_X || mode == LOCK_S);
ut_ad(gap == 0 || gap == LOCK_GAP);
ut_ad(insert_intention == LOCK_INSERT_INTENTION
|| insert_intention == 0);
lock = lock_rec_get_first(rec); lock = lock_rec_get_first(rec);
while (lock) { while (lock) {
if (lock_rec_has_to_wait(trx, mode, gap, insert_intention, if (lock_rec_has_to_wait(trx, mode, lock)) {
lock)) {
return(lock); return(lock);
} }
@ -1607,14 +1603,14 @@ lock_rec_create(
page_no = buf_frame_get_page_no(page); page_no = buf_frame_get_page_no(page);
heap_no = rec_get_heap_no(rec); heap_no = rec_get_heap_no(rec);
/* If rec is the supremum record, then we reset the gap bit, as /* If rec is the supremum record, then we reset the gap and
all locks on the supremum are automatically of the gap type, and LOCK_REC_NOT_GAP bits, as all locks on the supremum are
we try to avoid unnecessary memory consumption of a new record lock automatically of the gap type */
struct for a gap type lock */
if (rec == page_get_supremum_rec(page)) { if (rec == page_get_supremum_rec(page)) {
ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
type_mode = type_mode & ~LOCK_GAP; type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
} }
/* Make lock bitmap bigger by a safety margin */ /* Make lock bitmap bigger by a safety margin */
@ -1666,10 +1662,14 @@ ulint
lock_rec_enqueue_waiting( lock_rec_enqueue_waiting(
/*=====================*/ /*=====================*/
/* out: DB_LOCK_WAIT, DB_DEADLOCK, or /* out: DB_LOCK_WAIT, DB_DEADLOCK, or
DB_QUE_THR_SUSPENDED */ DB_QUE_THR_SUSPENDED, or DB_SUCCESS;
DB_SUCCESS means that there was a deadlock,
but another transaction was chosen as a
victim, and we got the lock immediately:
no need to wait then */
ulint type_mode,/* in: lock mode this transaction is ulint type_mode,/* in: lock mode this transaction is
requesting: LOCK_S or LOCK_X, ORed with requesting: LOCK_S or LOCK_X, possibly ORed
LOCK_GAP if a gap lock is requested, ORed with LOCK_GAP or LOCK_REC_NOT_GAP, ORed
with LOCK_INSERT_INTENTION if this waiting with LOCK_INSERT_INTENTION if this waiting
lock request is set when performing an lock request is set when performing an
insert of an index record */ insert of an index record */
@ -1718,6 +1718,14 @@ index->table_name);
return(DB_DEADLOCK); return(DB_DEADLOCK);
} }
/* If there was a deadlock but we chose another transaction as a
victim, it is possible that we already have the lock now granted! */
if (trx->wait_lock == NULL) {
return(DB_SUCCESS);
}
trx->que_state = TRX_QUE_LOCK_WAIT; trx->que_state = TRX_QUE_LOCK_WAIT;
trx->wait_started = time(NULL); trx->wait_started = time(NULL);
@ -1744,8 +1752,8 @@ lock_rec_add_to_queue(
/*==================*/ /*==================*/
/* out: lock where the bit was set, NULL if out /* out: lock where the bit was set, NULL if out
of memory */ of memory */
ulint type_mode,/* in: lock mode, wait, and gap flags; type ulint type_mode,/* in: lock mode, wait, gap etc. flags;
is ignored and replaced by LOCK_REC */ type is ignored and replaced by LOCK_REC */
rec_t* rec, /* in: record on page */ rec_t* rec, /* in: record on page */
dict_index_t* index, /* in: index of record */ dict_index_t* index, /* in: index of record */
trx_t* trx) /* in: transaction */ trx_t* trx) /* in: transaction */
@ -1759,12 +1767,11 @@ lock_rec_add_to_queue(
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
|| ((type_mode & LOCK_MODE_MASK) != LOCK_S) || ((type_mode & LOCK_MODE_MASK) != LOCK_S)
|| !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT, || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT, rec, trx));
rec, trx));
ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
|| ((type_mode & LOCK_MODE_MASK) != LOCK_X) || ((type_mode & LOCK_MODE_MASK) != LOCK_X)
|| !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, rec, trx));
rec, trx));
type_mode = type_mode | LOCK_REC; type_mode = type_mode | LOCK_REC;
page = buf_frame_align(rec); page = buf_frame_align(rec);
@ -1775,12 +1782,15 @@ lock_rec_add_to_queue(
struct for a gap type lock */ struct for a gap type lock */
if (rec == page_get_supremum_rec(page)) { if (rec == page_get_supremum_rec(page)) {
ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
type_mode = type_mode & ~LOCK_GAP; /* There should never be LOCK_REC_NOT_GAP on a supremum
record, but let us play safe */
type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
} }
/* Look for a waiting lock request on the same record, or for a /* Look for a waiting lock request on the same record or on a gap */
similar record lock on the same page */
heap_no = rec_get_heap_no(rec); heap_no = rec_get_heap_no(rec);
lock = lock_rec_get_first_on_page(rec); lock = lock_rec_get_first_on_page(rec);
@ -1795,6 +1805,9 @@ lock_rec_add_to_queue(
lock = lock_rec_get_next_on_page(lock); lock = lock_rec_get_next_on_page(lock);
} }
/* Look for a similar record lock on the same page: if one is found
and there are no waiting lock requests, we can just set the bit */
similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx); similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx);
if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) { if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) {
@ -1822,7 +1835,8 @@ lock_rec_lock_fast(
ibool impl, /* in: if TRUE, no lock is set if no wait ibool impl, /* in: if TRUE, no lock is set if no wait
is necessary: we assume that the caller will is necessary: we assume that the caller will
set an implicit lock */ set an implicit lock */
ulint mode, /* in: lock mode */ ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
dict_index_t* index, /* in: index of record */ dict_index_t* index, /* in: index of record */
que_thr_t* thr) /* in: query thread */ que_thr_t* thr) /* in: query thread */
@ -1831,7 +1845,15 @@ lock_rec_lock_fast(
ulint heap_no; ulint heap_no;
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
ut_ad((mode == LOCK_X) || (mode == LOCK_S)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
|| (LOCK_MODE_MASK & mode) == LOCK_X);
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
heap_no = rec_get_heap_no(rec); heap_no = rec_get_heap_no(rec);
@ -1877,7 +1899,8 @@ lock_rec_lock_slow(
ibool impl, /* in: if TRUE, no lock is set if no wait is ibool impl, /* in: if TRUE, no lock is set if no wait is
necessary: we assume that the caller will set necessary: we assume that the caller will set
an implicit lock */ an implicit lock */
ulint mode, /* in: lock mode */ ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
dict_index_t* index, /* in: index of record */ dict_index_t* index, /* in: index of record */
que_thr_t* thr) /* in: query thread */ que_thr_t* thr) /* in: query thread */
@ -1886,20 +1909,24 @@ lock_rec_lock_slow(
ulint err; ulint err;
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
ut_ad((mode == LOCK_X) || (mode == LOCK_S)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
|| (LOCK_MODE_MASK & mode) == LOCK_X);
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
trx = thr_get_trx(thr); trx = thr_get_trx(thr);
ut_ad((mode != LOCK_S) || lock_table_has(trx, index->table,
LOCK_IS));
ut_ad((mode != LOCK_X) || lock_table_has(trx, index->table,
LOCK_IX));
if (lock_rec_has_expl(mode, rec, trx)) { if (lock_rec_has_expl(mode, rec, trx)) {
/* The trx already has a strong enough lock on rec: do /* The trx already has a strong enough lock on rec: do
nothing */ nothing */
err = DB_SUCCESS; err = DB_SUCCESS;
} else if (lock_rec_other_has_conflicting(mode, 0, 0, rec, trx)) { } else if (lock_rec_other_has_conflicting(mode, rec, trx)) {
/* If another transaction has a non-gap conflicting request in /* If another transaction has a non-gap conflicting request in
the queue, as this transaction does not have a lock strong the queue, as this transaction does not have a lock strong
@ -1935,7 +1962,8 @@ lock_rec_lock(
ibool impl, /* in: if TRUE, no lock is set if no wait is ibool impl, /* in: if TRUE, no lock is set if no wait is
necessary: we assume that the caller will set necessary: we assume that the caller will set
an implicit lock */ an implicit lock */
ulint mode, /* in: lock mode */ ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
dict_index_t* index, /* in: index of record */ dict_index_t* index, /* in: index of record */
que_thr_t* thr) /* in: query thread */ que_thr_t* thr) /* in: query thread */
@ -1943,10 +1971,15 @@ lock_rec_lock(
ulint err; ulint err;
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
ut_ad((mode != LOCK_S) || lock_table_has(thr_get_trx(thr), ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
index->table, LOCK_IS)); || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
ut_ad((mode != LOCK_X) || lock_table_has(thr_get_trx(thr), ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
index->table, LOCK_IX)); || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
|| (LOCK_MODE_MASK & mode) == LOCK_X);
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0);
if (lock_rec_lock_fast(impl, mode, rec, index, thr)) { if (lock_rec_lock_fast(impl, mode, rec, index, thr)) {
@ -2030,7 +2063,14 @@ lock_grant(
ut_dulint_get_low(lock->trx->id)); ut_dulint_get_low(lock->trx->id));
} }
/* If we are resolving a deadlock by choosing another transaction
as a victim, then our original transaction may not be in the
TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
for it */
if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
trx_end_lock_wait(lock->trx); trx_end_lock_wait(lock->trx);
}
} }
/***************************************************************** /*****************************************************************
@ -2199,9 +2239,10 @@ lock_rec_reset_and_release_wait(
} }
/***************************************************************** /*****************************************************************
Makes a record to inherit the locks of another record as gap type locks, but Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
does not reset the lock bits of the other record. Also waiting lock requests of another record as gap type locks, but does not reset the lock bits of
on rec are inherited as GRANTED gap locks. */ the other record. Also waiting lock requests on rec are inherited as
GRANTED gap locks. */
void void
lock_rec_inherit_to_gap( lock_rec_inherit_to_gap(
@ -2217,9 +2258,45 @@ lock_rec_inherit_to_gap(
lock = lock_rec_get_first(rec); lock = lock_rec_get_first(rec);
while (lock != NULL) { while (lock != NULL) {
lock_rec_add_to_queue(((lock->type_mode | LOCK_GAP) if (!lock_rec_get_insert_intention(lock)) {
& ~LOCK_WAIT),
lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
| LOCK_GAP,
heir, lock->index, lock->trx); heir, lock->index, lock->trx);
}
lock = lock_rec_get_next(rec, lock);
}
}
/*****************************************************************
Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
of another record as gap type locks, but does not reset the lock bits of the
other record. Also waiting lock requests are inherited as GRANTED gap locks. */
void
lock_rec_inherit_to_gap_if_gap_lock(
/*================================*/
rec_t* heir, /* in: record which inherits */
rec_t* rec) /* in: record from which inherited; does NOT reset
the locks on this record */
{
lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
lock = lock_rec_get_first(rec);
while (lock != NULL) {
if (!lock_rec_get_insert_intention(lock)
&& (page_rec_is_supremum(rec)
|| !lock_rec_get_rec_not_gap(lock))) {
lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
| LOCK_GAP,
heir, lock->index, lock->trx);
}
lock = lock_rec_get_next(rec, lock); lock = lock_rec_get_next(rec, lock);
} }
} }
@ -2778,9 +2855,10 @@ lock_update_insert(
{ {
lock_mutex_enter_kernel(); lock_mutex_enter_kernel();
/* Inherit the locks for rec, in gap mode, from the next record */ /* Inherit the gap-locking locks for rec, in gap mode, from the next
record */
lock_rec_inherit_to_gap(rec, page_rec_get_next(rec)); lock_rec_inherit_to_gap_if_gap_lock(rec, page_rec_get_next(rec));
lock_mutex_exit_kernel(); lock_mutex_exit_kernel();
} }
@ -2859,20 +2937,23 @@ static
ibool ibool
lock_deadlock_occurs( lock_deadlock_occurs(
/*=================*/ /*=================*/
/* out: TRUE if a deadlock was detected */ /* out: TRUE if a deadlock was detected and we
chose trx as a victim; FALSE if no deadlock, or
there was a deadlock, but we chose other
transaction(s) as victim(s) */
lock_t* lock, /* in: lock the transaction is requesting */ lock_t* lock, /* in: lock the transaction is requesting */
trx_t* trx) /* in: transaction */ trx_t* trx) /* in: transaction */
{ {
dict_table_t* table; dict_table_t* table;
dict_index_t* index; dict_index_t* index;
trx_t* mark_trx; trx_t* mark_trx;
ibool ret; ulint ret;
ulint cost = 0; ulint cost = 0;
char* err_buf; char* err_buf;
ut_ad(trx && lock); ut_ad(trx && lock);
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
retry:
/* We check that adding this trx to the waits-for graph /* We check that adding this trx to the waits-for graph
does not produce a cycle. First mark all active transactions does not produce a cycle. First mark all active transactions
with 0: */ with 0: */
@ -2886,7 +2967,14 @@ lock_deadlock_occurs(
ret = lock_deadlock_recursive(trx, trx, lock, &cost); ret = lock_deadlock_recursive(trx, trx, lock, &cost);
if (ret) { if (ret == LOCK_VICTIM_IS_OTHER) {
/* We chose some other trx as a victim: retry if there still
is a deadlock */
goto retry;
}
if (ret == LOCK_VICTIM_IS_START) {
if (lock_get_type(lock) == LOCK_TABLE) { if (lock_get_type(lock) == LOCK_TABLE) {
table = lock->un_member.tab_lock.table; table = lock->un_member.tab_lock.table;
index = NULL; index = NULL;
@ -2899,19 +2987,6 @@ lock_deadlock_occurs(
err_buf = lock_latest_err_buf + strlen(lock_latest_err_buf); err_buf = lock_latest_err_buf + strlen(lock_latest_err_buf);
err_buf += sprintf(err_buf,
"*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
ut_a(err_buf <= lock_latest_err_buf + 4000);
if (lock_get_type(lock) == LOCK_REC) {
lock_rec_print(err_buf, lock);
err_buf += strlen(err_buf);
} else {
lock_table_print(err_buf, lock);
err_buf += strlen(err_buf);
}
ut_a(err_buf <= lock_latest_err_buf + 4000); ut_a(err_buf <= lock_latest_err_buf + 4000);
err_buf += sprintf(err_buf, err_buf += sprintf(err_buf,
@ -2923,30 +2998,39 @@ lock_deadlock_occurs(
sess_raise_error_low(trx, DB_DEADLOCK, lock->type_mode, table, sess_raise_error_low(trx, DB_DEADLOCK, lock->type_mode, table,
index, NULL, NULL, NULL); index, NULL, NULL, NULL);
*/ */
return(TRUE);
} }
return(ret); return(FALSE);
} }
/************************************************************************ /************************************************************************
Looks recursively for a deadlock. */ Looks recursively for a deadlock. */
static static
ibool ulint
lock_deadlock_recursive( lock_deadlock_recursive(
/*====================*/ /*====================*/
/* out: TRUE if a deadlock was detected /* out: 0 if no deadlock found,
or the calculation took too long */ LOCK_VICTIM_IS_START if there was a deadlock
and we chose 'start' as the victim,
LOCK_VICTIM_IS_OTHER if a deadlock
was found and we chose some other trx as a
victim: we must do the search again in this
last case because there may be another
deadlock! */
trx_t* start, /* in: recursion starting point */ trx_t* start, /* in: recursion starting point */
trx_t* trx, /* in: a transaction waiting for a lock */ trx_t* trx, /* in: a transaction waiting for a lock */
lock_t* wait_lock, /* in: the lock trx is waiting to be granted */ lock_t* wait_lock, /* in: the lock trx is waiting to be granted */
ulint* cost) /* in/out: number of calculation steps thus ulint* cost) /* in/out: number of calculation steps thus
far: if this exceeds LOCK_MAX_N_STEPS_... far: if this exceeds LOCK_MAX_N_STEPS_...
we return TRUE */ we return LOCK_VICTIM_IS_START */
{ {
lock_t* lock; lock_t* lock;
ulint bit_no; ulint bit_no;
trx_t* lock_trx; trx_t* lock_trx;
char* err_buf; char* err_buf;
ulint ret;
ut_a(trx && start && wait_lock); ut_a(trx && start && wait_lock);
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
@ -2955,14 +3039,14 @@ lock_deadlock_recursive(
/* We have already exhaustively searched the subtree starting /* We have already exhaustively searched the subtree starting
from this trx */ from this trx */
return(FALSE); return(0);
} }
*cost = *cost + 1; *cost = *cost + 1;
if (*cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK) { if (*cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK) {
return(TRUE); return(LOCK_VICTIM_IS_START);
} }
lock = wait_lock; lock = wait_lock;
@ -2998,6 +3082,9 @@ lock_deadlock_recursive(
lock_trx = lock->trx; lock_trx = lock->trx;
if (lock_trx == start) { if (lock_trx == start) {
/* We came back to the recursion starting
point: a deadlock detected */
err_buf = lock_latest_err_buf; err_buf = lock_latest_err_buf;
ut_sprintf_timestamp(err_buf); ut_sprintf_timestamp(err_buf);
@ -3045,11 +3132,59 @@ lock_deadlock_recursive(
ut_a(err_buf <= lock_latest_err_buf + 4000); ut_a(err_buf <= lock_latest_err_buf + 4000);
err_buf += sprintf(err_buf,
"*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
ut_a(err_buf <= lock_latest_err_buf + 4000);
if (lock_get_type(start->wait_lock)
== LOCK_REC) {
lock_rec_print(err_buf,
start->wait_lock);
err_buf += strlen(err_buf);
} else {
lock_table_print(err_buf,
start->wait_lock);
err_buf += strlen(err_buf);
}
if (lock_print_waits) { if (lock_print_waits) {
printf("Deadlock detected\n"); printf("Deadlock detected\n");
} }
return(TRUE); if (ut_dulint_cmp(wait_lock->trx->undo_no,
start->undo_no) >= 0) {
/* Our recursion starting point
transaction is 'smaller', let us
choose 'start' as the victim and roll
back it */
return(LOCK_VICTIM_IS_START);
}
lock_deadlock_found = TRUE;
ut_a(err_buf <= lock_latest_err_buf + 4000);
/* Let us choose the transaction of wait_lock
as a victim to try to avoid deadlocking our
recursion starting point transaction */
err_buf += sprintf(err_buf,
"*** WE ROLL BACK TRANSACTION (1)\n");
wait_lock->trx->error_state = DB_DEADLOCK;
lock_cancel_waiting_and_release(wait_lock);
/* Since trx and wait_lock are no longer
in the waits-for graph, we can return FALSE;
note that our selective algorithm can choose
several transactions as victims, but still
we may end up rolling back also the recursion
starting point transaction! */
return(LOCK_VICTIM_IS_OTHER);
} }
if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) { if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
@ -3058,10 +3193,11 @@ lock_deadlock_recursive(
incompatible mode, and is itself waiting for incompatible mode, and is itself waiting for
a lock */ a lock */
if (lock_deadlock_recursive(start, lock_trx, ret = lock_deadlock_recursive(start, lock_trx,
lock_trx->wait_lock, cost)) { lock_trx->wait_lock, cost);
if (ret != 0) {
return(TRUE); return(ret);
} }
} }
} }
@ -3153,12 +3289,16 @@ lock_table_remove_low(
/************************************************************************* /*************************************************************************
Enqueues a waiting request for a table lock which cannot be granted Enqueues a waiting request for a table lock which cannot be granted
immediately. Checks for deadlocks. */ immediately. Checks for deadlocks. */
static
ulint ulint
lock_table_enqueue_waiting( lock_table_enqueue_waiting(
/*=======================*/ /*=======================*/
/* out: DB_LOCK_WAIT, DB_DEADLOCK, or /* out: DB_LOCK_WAIT, DB_DEADLOCK, or
DB_QUE_THR_SUSPENDED */ DB_QUE_THR_SUSPENDED, or DB_SUCCESS;
DB_SUCCESS means that there was a deadlock,
but another transaction was chosen as a
victim, and we got the lock immediately:
no need to wait then */
ulint mode, /* in: lock mode this transaction is ulint mode, /* in: lock mode this transaction is
requesting */ requesting */
dict_table_t* table, /* in: table */ dict_table_t* table, /* in: table */
@ -3205,6 +3345,13 @@ table->name);
return(DB_DEADLOCK); return(DB_DEADLOCK);
} }
if (trx->wait_lock == NULL) {
/* Deadlock resolution chose another transaction as a victim,
and we accidentally got our lock granted! */
return(DB_SUCCESS);
}
trx->que_state = TRX_QUE_LOCK_WAIT; trx->que_state = TRX_QUE_LOCK_WAIT;
trx->wait_started = time(NULL); trx->wait_started = time(NULL);
@ -3292,7 +3439,7 @@ lock_table(
if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) { if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) {
/* Another trx has a request on the table in an incompatible /* Another trx has a request on the table in an incompatible
mode: this trx must wait */ mode: this trx may have to wait */
err = lock_table_enqueue_waiting(mode, table, thr); err = lock_table_enqueue_waiting(mode, table, thr);
@ -3659,7 +3806,11 @@ lock_rec_print(
} }
if (lock_rec_get_gap(lock)) { if (lock_rec_get_gap(lock)) {
buf += sprintf(buf, " gap type lock"); buf += sprintf(buf, " locks gap before rec");
}
if (lock_rec_get_rec_not_gap(lock)) {
buf += sprintf(buf, " locks rec but not gap");
} }
if (lock_rec_get_insert_intention(lock)) { if (lock_rec_get_insert_intention(lock)) {
@ -4080,7 +4231,8 @@ lock_rec_queue_validate(
if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0, if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0,
LOCK_WAIT, rec, impl_trx)) { LOCK_WAIT, rec, impl_trx)) {
ut_a(lock_rec_has_expl(LOCK_X, rec, impl_trx)); ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
impl_trx));
} }
} }
@ -4095,7 +4247,8 @@ lock_rec_queue_validate(
if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0, if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0,
LOCK_WAIT, rec, impl_trx)) { LOCK_WAIT, rec, impl_trx)) {
ut_a(lock_rec_has_expl(LOCK_X, rec, impl_trx)); ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
impl_trx));
} }
} }
@ -4359,8 +4512,8 @@ lock_rec_insert_check_and_lock(
*inherit = TRUE; *inherit = TRUE;
/* If another transaction has an explicit lock request, gap or not, /* If another transaction has an explicit lock request which locks
waiting or granted, on the successor, the insert has to wait. the gap, waiting or granted, on the successor, the insert has to wait.
An exception is the case where the lock by the another transaction An exception is the case where the lock by the another transaction
is a gap type lock which it placed to wait for its turn to insert. We is a gap type lock which it placed to wait for its turn to insert. We
@ -4369,8 +4522,10 @@ lock_rec_insert_check_and_lock(
had to wait for their insert. Both had waiting gap type lock requests had to wait for their insert. Both had waiting gap type lock requests
on the successor, which produced an unnecessary deadlock. */ on the successor, which produced an unnecessary deadlock. */
if (lock_rec_other_has_conflicting(LOCK_X, LOCK_GAP, if (lock_rec_other_has_conflicting(LOCK_X | LOCK_GAP
LOCK_INSERT_INTENTION, next_rec, trx)) { | LOCK_INSERT_INTENTION, next_rec, trx)) {
/* Note that we may get DB_SUCCESS also here! */
err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
| LOCK_INSERT_INTENTION, | LOCK_INSERT_INTENTION,
next_rec, index, thr); next_rec, index, thr);
@ -4418,9 +4573,11 @@ lock_rec_convert_impl_to_expl(
/* If the transaction has no explicit x-lock set on the /* If the transaction has no explicit x-lock set on the
record, set one for it */ record, set one for it */
if (!lock_rec_has_expl(LOCK_X, rec, impl_trx)) { if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
impl_trx)) {
lock_rec_add_to_queue(LOCK_REC | LOCK_X, rec, index, lock_rec_add_to_queue(LOCK_REC | LOCK_X
| LOCK_REC_NOT_GAP, rec, index,
impl_trx); impl_trx);
} }
} }
@ -4466,7 +4623,7 @@ lock_clust_rec_modify_check_and_lock(
lock_rec_convert_impl_to_expl(rec, index); lock_rec_convert_impl_to_expl(rec, index);
err = lock_rec_lock(TRUE, LOCK_X, rec, index, thr); err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr);
lock_mutex_exit_kernel(); lock_mutex_exit_kernel();
@ -4511,7 +4668,7 @@ lock_sec_rec_modify_check_and_lock(
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
err = lock_rec_lock(TRUE, LOCK_X, rec, index, thr); err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr);
lock_mutex_exit_kernel(); lock_mutex_exit_kernel();
@ -4545,6 +4702,8 @@ lock_sec_rec_read_check_and_lock(
ulint mode, /* in: mode of the lock which the read cursor ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */ latter is possible in SELECT FOR UPDATE */
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
que_thr_t* thr) /* in: query thread */ que_thr_t* thr) /* in: query thread */
{ {
ulint err; ulint err;
@ -4576,7 +4735,7 @@ lock_sec_rec_read_check_and_lock(
lock_rec_convert_impl_to_expl(rec, index); lock_rec_convert_impl_to_expl(rec, index);
} }
err = lock_rec_lock(FALSE, mode, rec, index, thr); err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr);
lock_mutex_exit_kernel(); lock_mutex_exit_kernel();
@ -4607,13 +4766,16 @@ lock_clust_rec_read_check_and_lock(
ulint mode, /* in: mode of the lock which the read cursor ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */ latter is possible in SELECT FOR UPDATE */
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
que_thr_t* thr) /* in: query thread */ que_thr_t* thr) /* in: query thread */
{ {
ulint err; ulint err;
ut_ad(index->type & DICT_CLUSTERED); ut_ad(index->type & DICT_CLUSTERED);
ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
|| gap_mode == LOCK_REC_NOT_GAP);
if (flags & BTR_NO_LOCKING_FLAG) { if (flags & BTR_NO_LOCKING_FLAG) {
return(DB_SUCCESS); return(DB_SUCCESS);
@ -4631,7 +4793,7 @@ lock_clust_rec_read_check_and_lock(
lock_rec_convert_impl_to_expl(rec, index); lock_rec_convert_impl_to_expl(rec, index);
} }
err = lock_rec_lock(FALSE, mode, rec, index, thr); err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr);
lock_mutex_exit_kernel(); lock_mutex_exit_kernel();

View File

@ -350,6 +350,16 @@ mem_hash_remove(
node->nth_heap); node->nth_heap);
printf("in %s line %lu and tried to free in %s line %lu.\n", printf("in %s line %lu and tried to free in %s line %lu.\n",
node->file_name, node->line, file_name, line); node->file_name, node->line, file_name, line);
printf(
"Hex dump of 400 bytes around memory heap first block start:\n");
ut_print_buf((byte*)(node->heap) - 200, 400);
printf("\nDump of the mem heap:\n");
mem_heap_validate_or_print(node->heap, NULL, TRUE, &error, &size,
NULL, NULL);
ut_error; ut_error;
} }

View File

@ -148,7 +148,7 @@ Gets the operating system version. Currently works only on Windows. */
ulint ulint
os_get_os_version(void) os_get_os_version(void)
/*===================*/ /*===================*/
/* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */ /* out: OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
{ {
#ifdef __WIN__ #ifdef __WIN__
OSVERSIONINFO os_info; OSVERSIONINFO os_info;
@ -162,7 +162,11 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95); return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (os_info.dwMajorVersion <= 4) {
return(OS_WINNT); return(OS_WINNT);
} else {
return(OS_WIN2000);
}
} else { } else {
ut_error; ut_error;
return(0); return(0);
@ -268,9 +272,7 @@ os_file_get_last_error(void)
} }
/******************************************************************** /********************************************************************
Does error handling when a file operation fails. If we have run out Does error handling when a file operation fails. */
of disk space, then the user can clean the disk. If we do not find
a specified file, then the user can copy it to disk. */
static static
ibool ibool
os_file_handle_error( os_file_handle_error(
@ -503,7 +505,11 @@ try_again:
value 2 denotes that we do not flush the log at every value 2 denotes that we do not flush the log at every
commit, but only once per second */ commit, but only once per second */
} else { } else {
attributes = attributes | FILE_FLAG_NO_BUFFERING; if (srv_win_file_flush_method ==
SRV_WIN_IO_UNBUFFERED) {
attributes = attributes
| FILE_FLAG_NO_BUFFERING;
}
} }
#endif #endif
} else if (purpose == OS_FILE_NORMAL) { } else if (purpose == OS_FILE_NORMAL) {
@ -514,7 +520,11 @@ try_again:
value 2 denotes that we do not flush the log at every value 2 denotes that we do not flush the log at every
commit, but only once per second */ commit, but only once per second */
} else { } else {
attributes = attributes | FILE_FLAG_NO_BUFFERING; if (srv_win_file_flush_method ==
SRV_WIN_IO_UNBUFFERED) {
attributes = attributes
| FILE_FLAG_NO_BUFFERING;
}
} }
#endif #endif
} else { } else {
@ -1752,6 +1762,7 @@ os_aio(
os_aio_array_t* array; os_aio_array_t* array;
os_aio_slot_t* slot; os_aio_slot_t* slot;
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
ibool retval;
BOOL ret = TRUE; BOOL ret = TRUE;
DWORD len = n; DWORD len = n;
void* dummy_mess1; void* dummy_mess1;
@ -1824,6 +1835,8 @@ try_again:
if (os_aio_use_native_aio) { if (os_aio_use_native_aio) {
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_n_file_reads++; os_n_file_reads++;
os_bytes_read_since_printout += len;
ret = ReadFile(file, buf, (DWORD)n, &len, ret = ReadFile(file, buf, (DWORD)n, &len,
&(slot->control)); &(slot->control));
#elif defined(POSIX_ASYNC_IO) #elif defined(POSIX_ASYNC_IO)
@ -1870,10 +1883,12 @@ try_again:
where we also use async i/o: in Windows we must where we also use async i/o: in Windows we must
use the same wait mechanism as for async i/o */ use the same wait mechanism as for async i/o */
return(os_aio_windows_handle(ULINT_UNDEFINED, retval = os_aio_windows_handle(ULINT_UNDEFINED,
slot->pos, slot->pos,
&dummy_mess1, &dummy_mess2, &dummy_mess1, &dummy_mess2,
&dummy_type)); &dummy_type);
return(retval);
} }
return(TRUE); return(TRUE);
@ -1897,8 +1912,6 @@ try_again:
goto try_again; goto try_again;
} }
ut_error;
return(FALSE); return(FALSE);
} }
@ -1958,14 +1971,14 @@ os_aio_windows_handle(
n = array->n_slots / array->n_segments; n = array->n_slots / array->n_segments;
if (array == os_aio_sync_array) { if (array == os_aio_sync_array) {
srv_io_thread_op_info[orig_seg] = "wait windows aio for 1 page"; srv_io_thread_op_info[orig_seg] = "wait Windows aio for 1 page";
ut_ad(pos < array->n_slots); ut_ad(pos < array->n_slots);
os_event_wait(array->events[pos]); os_event_wait(array->events[pos]);
i = pos; i = pos;
} else { } else {
srv_io_thread_op_info[orig_seg] = srv_io_thread_op_info[orig_seg] =
"wait windows aio for n pages"; "wait Windows aio";
i = os_event_wait_multiple(n, (array->events) + segment * n); i = os_event_wait_multiple(n, (array->events) + segment * n);
} }
@ -1991,9 +2004,7 @@ os_aio_windows_handle(
ut_a(TRUE == os_file_flush(slot->file)); ut_a(TRUE == os_file_flush(slot->file));
} }
} else { } else {
os_file_get_last_error(); os_file_handle_error(slot->file, slot->name);
ut_error;
ret_val = FALSE; ret_val = FALSE;
} }

View File

@ -18,6 +18,23 @@ Created 9/30/1995 Heikki Tuuri
#include "ut0mem.h" #include "ut0mem.h"
/********************************************************************
Converts the current process id to a number. It is not guaranteed that the
number is unique. In Linux returns the 'process number' of the current
thread. That number is the same as one sees in 'top', for example. In Linux
the thread id is not the same as one sees in 'top'. */
ulint
os_proc_get_number(void)
/*====================*/
{
#ifdef __WIN__
return((ulint)GetCurrentProcessId());
#else
return((ulint)getpid());
#endif
}
/******************************************************************** /********************************************************************
Allocates non-cacheable memory. */ Allocates non-cacheable memory. */

View File

@ -169,7 +169,7 @@ page_cur_search_with_match(
ut_ad(dtuple_check_typed(tuple)); ut_ad(dtuple_check_typed(tuple));
ut_ad((mode == PAGE_CUR_L) || (mode == PAGE_CUR_LE) ut_ad((mode == PAGE_CUR_L) || (mode == PAGE_CUR_LE)
|| (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)
|| (mode == PAGE_CUR_DBG)); || (mode == PAGE_CUR_LE_OR_EXTENDS) || (mode == PAGE_CUR_DBG));
#ifdef PAGE_CUR_ADAPT #ifdef PAGE_CUR_ADAPT
if ((page_header_get_field(page, PAGE_LEVEL) == 0) if ((page_header_get_field(page, PAGE_LEVEL) == 0)
@ -232,9 +232,26 @@ page_cur_search_with_match(
low_matched_bytes = cur_matched_bytes; low_matched_bytes = cur_matched_bytes;
} else if (cmp == -1) { } else if (cmp == -1) {
if (mode == PAGE_CUR_LE_OR_EXTENDS
&& dfield_get_len(dtuple_get_nth_field(tuple,
cur_matched_fields))
== cur_matched_bytes
&& rec_get_nth_field_len(mid_rec,
cur_matched_fields)
!= UNIV_SQL_NULL) {
/* This means current dfield is not SQL
NULL, and the current rec field extends it */
low = mid;
low_matched_fields = cur_matched_fields;
low_matched_bytes = cur_matched_bytes;
} else {
up = mid; up = mid;
up_matched_fields = cur_matched_fields; up_matched_fields = cur_matched_fields;
up_matched_bytes = cur_matched_bytes; up_matched_bytes = cur_matched_bytes;
}
} else if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_LE)) { } else if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_LE)) {
low = mid; low = mid;
@ -252,8 +269,8 @@ page_cur_search_with_match(
slot = page_dir_get_nth_slot(page, up); slot = page_dir_get_nth_slot(page, up);
up_rec = page_dir_slot_get_rec(slot); up_rec = page_dir_slot_get_rec(slot);
/* Perform linear search until the upper and lower records /* Perform linear search until the upper and lower records come to
come to distance 1 of each other. */ distance 1 of each other. */
while (page_rec_get_next(low_rec) != up_rec) { while (page_rec_get_next(low_rec) != up_rec) {
@ -272,10 +289,25 @@ page_cur_search_with_match(
low_matched_bytes = cur_matched_bytes; low_matched_bytes = cur_matched_bytes;
} else if (cmp == -1) { } else if (cmp == -1) {
if (mode == PAGE_CUR_LE_OR_EXTENDS
&& dfield_get_len(dtuple_get_nth_field(tuple,
cur_matched_fields))
== cur_matched_bytes
&& rec_get_nth_field_len(mid_rec,
cur_matched_fields)
!= UNIV_SQL_NULL) {
/* This means current dfield is not SQL
NULL, and the current rec field extends it */
low = mid;
low_matched_fields = cur_matched_fields;
low_matched_bytes = cur_matched_bytes;
} else {
up_rec = mid_rec; up_rec = mid_rec;
up_matched_fields = cur_matched_fields; up_matched_fields = cur_matched_fields;
up_matched_bytes = cur_matched_bytes; up_matched_bytes = cur_matched_bytes;
}
} else if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_LE)) { } else if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_LE)) {
low_rec = mid_rec; low_rec = mid_rec;
low_matched_fields = cur_matched_fields; low_matched_fields = cur_matched_fields;

View File

@ -1312,6 +1312,194 @@ page_rec_validate(
return(TRUE); return(TRUE);
} }
/*******************************************************************
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage. */
ibool
page_simple_validate(
/*=================*/
/* out: TRUE if ok */
page_t* page) /* in: index page */
{
page_cur_t cur;
page_dir_slot_t* slot;
ulint slot_no;
ulint n_slots;
rec_t* rec;
byte* rec_heap_top;
ulint count;
ulint own_count;
ibool ret = FALSE;
/* Check first that the record heap and the directory do not
overlap. */
n_slots = page_dir_get_n_slots(page);
if (n_slots > UNIV_PAGE_SIZE / 4) {
fprintf(stderr,
"Nonsensical number %lu of page dir slots\n", n_slots);
goto func_exit;
}
rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
if (rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1)) {
fprintf(stderr,
"Record heap and dir overlap on a page, heap top %lu, dir %lu\n",
(ulint)(page_header_get_ptr(page, PAGE_HEAP_TOP) - page),
(ulint)(page_dir_get_nth_slot(page, n_slots - 1) - page));
goto func_exit;
}
/* Validate the record list in a loop checking also that it is
consistent with the page record directory. */
count = 0;
own_count = 1;
slot_no = 0;
slot = page_dir_get_nth_slot(page, slot_no);
page_cur_set_before_first(page, &cur);
for (;;) {
rec = (&cur)->rec;
if (rec > rec_heap_top) {
fprintf(stderr,
"Record %lu is above rec heap top %lu\n",
(ulint)(rec - page), (ulint)(rec_heap_top - page));
goto func_exit;
}
if (rec_get_n_owned(rec) != 0) {
/* This is a record pointed to by a dir slot */
if (rec_get_n_owned(rec) != own_count) {
fprintf(stderr,
"Wrong owned count %lu, %lu, rec %lu\n",
rec_get_n_owned(rec), own_count,
(ulint)(rec - page));
goto func_exit;
}
if (page_dir_slot_get_rec(slot) != rec) {
fprintf(stderr,
"Dir slot does not point to right rec %lu\n",
(ulint)(rec - page));
goto func_exit;
}
own_count = 0;
if (!page_cur_is_after_last(&cur)) {
slot_no++;
slot = page_dir_get_nth_slot(page, slot_no);
}
}
if (page_cur_is_after_last(&cur)) {
break;
}
if (rec_get_next_offs(rec) < FIL_PAGE_DATA
|| rec_get_next_offs(rec) >= UNIV_PAGE_SIZE) {
fprintf(stderr,
"Next record offset nonsensical %lu for rec %lu\n",
rec_get_next_offs(rec),
(ulint)(rec - page));
goto func_exit;
}
count++;
if (count > UNIV_PAGE_SIZE) {
fprintf(stderr,
"Page record list appears to be circular %lu\n",
count);
goto func_exit;
}
page_cur_move_to_next(&cur);
own_count++;
}
if (rec_get_n_owned(rec) == 0) {
fprintf(stderr, "n owned is zero in a supremum rec\n");
goto func_exit;
}
if (slot_no != n_slots - 1) {
fprintf(stderr, "n slots wrong %lu, %lu\n",
slot_no, n_slots - 1);
goto func_exit;
}
if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
fprintf(stderr, "n recs wrong %lu %lu\n",
page_header_get_field(page, PAGE_N_RECS) + 2, count + 1);
goto func_exit;
}
/* Check then the free list */
rec = page_header_get_ptr(page, PAGE_FREE);
while (rec != NULL) {
if (rec < page + FIL_PAGE_DATA
|| rec >= page + UNIV_PAGE_SIZE) {
fprintf(stderr,
"Free list record has a nonsensical offset %lu\n",
(ulint)(rec - page));
goto func_exit;
}
if (rec > rec_heap_top) {
fprintf(stderr,
"Free list record %lu is above rec heap top %lu\n",
(ulint)(rec - page), (ulint)(rec_heap_top - page));
goto func_exit;
}
count++;
if (count > UNIV_PAGE_SIZE) {
fprintf(stderr,
"Page free list appears to be circular %lu\n",
count);
goto func_exit;
}
rec = page_rec_get_next(rec);
}
if (page_header_get_field(page, PAGE_N_HEAP) != count + 1) {
fprintf(stderr, "N heap is wrong %lu, %lu\n",
page_header_get_field(page, PAGE_N_HEAP), count + 1);
goto func_exit;
}
ret = TRUE;
func_exit:
return(ret);
}
/******************************************************************* /*******************************************************************
This function checks the consistency of an index page. */ This function checks the consistency of an index page. */
@ -1339,6 +1527,14 @@ page_validate(
ulint i; ulint i;
char err_buf[1000]; char err_buf[1000];
if (!page_simple_validate(page)) {
buf_page_print(page);
fprintf(stderr, "Apparent corruption in a page in index %s\n",
index->name);
return(FALSE);
}
heap = mem_heap_create(UNIV_PAGE_SIZE); heap = mem_heap_create(UNIV_PAGE_SIZE);
/* The following buffer is used to check that the /* The following buffer is used to check that the

View File

@ -4,8 +4,6 @@
* $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $ * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $
*/ */
#include "univ.i"
#define FLEX_SCANNER #define FLEX_SCANNER
#define YY_FLEX_MAJOR_VERSION 2 #define YY_FLEX_MAJOR_VERSION 2
#define YY_FLEX_MINOR_VERSION 5 #define YY_FLEX_MINOR_VERSION 5
@ -609,18 +607,13 @@ How to make the InnoDB parser and lexer C files:
6. Remove the #include of unistd.h from about line 2500 of lexyy.c 6. Remove the #include of unistd.h from about line 2500 of lexyy.c
7. Move #include <math.h> in pars0grm.c after #include "univ.i" to remove
a large file compilation error on AIX.
8. Move #include "univ.i" in lexyy.c to the file start to remove a large
file compilation error on AIX.
These instructions seem to work at least with bison-1.28 and flex-2.5.4 on These instructions seem to work at least with bison-1.28 and flex-2.5.4 on
Linux. Linux.
*******************************************************/ *******************************************************/
#line 36 "pars0lex.l" #line 36 "pars0lex.l"
#define YYSTYPE que_node_t* #define YYSTYPE que_node_t*
#include "univ.i"
#include "pars0pars.h" #include "pars0pars.h"
#include "pars0grm.h" #include "pars0grm.h"
#include "pars0sym.h" #include "pars0sym.h"

View File

@ -102,8 +102,6 @@ que_node_t */
#include "que0que.h" #include "que0que.h"
#include "row0sel.h" #include "row0sel.h"
#include <math.h>
#define YYSTYPE que_node_t* #define YYSTYPE que_node_t*
/* #define __STDC__ */ /* #define __STDC__ */

View File

@ -200,6 +200,28 @@ read_view_close(
UT_LIST_REMOVE(view_list, trx_sys->view_list, view); UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
} }
/*************************************************************************
Closes a consistent read view for MySQL. This function is called at an SQL
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
void
read_view_close_for_mysql(
/*======================*/
trx_t* trx) /* in: trx which has a read view */
{
ut_a(trx->read_view);
mutex_enter(&kernel_mutex);
read_view_close(trx->read_view);
mem_heap_empty(trx->read_view_heap);
trx->read_view = NULL;
mutex_exit(&kernel_mutex);
}
/************************************************************************* /*************************************************************************
Prints a read view to stderr. */ Prints a read view to stderr. */

View File

@ -321,59 +321,6 @@ row_ins_clust_index_entry_by_modify(
return(err); return(err);
} }
/*******************************************************************
Checks if a unique key violation to rec would occur at the index entry
insert. */
static
ibool
row_ins_dupl_error_with_rec(
/*========================*/
/* out: TRUE if error */
rec_t* rec, /* in: user record; NOTE that we assume
that the caller already has a record lock on
the record! */
dtuple_t* entry, /* in: entry to insert */
dict_index_t* index) /* in: index */
{
ulint matched_fields;
ulint matched_bytes;
ulint n_unique;
ulint i;
n_unique = dict_index_get_n_unique(index);
matched_fields = 0;
matched_bytes = 0;
cmp_dtuple_rec_with_match(entry, rec, &matched_fields, &matched_bytes);
if (matched_fields < n_unique) {
return(FALSE);
}
/* In a unique secondary index we allow equal key values if they
contain SQL NULLs */
if (!(index->type & DICT_CLUSTERED)) {
for (i = 0; i < n_unique; i++) {
if (UNIV_SQL_NULL == dfield_get_len(
dtuple_get_nth_field(entry, i))) {
return(FALSE);
}
}
}
if (!rec_get_deleted_flag(rec)) {
return(TRUE);
}
return(FALSE);
}
/************************************************************************* /*************************************************************************
Either deletes or sets the referencing columns SQL NULL in a child row. Either deletes or sets the referencing columns SQL NULL in a child row.
Used in ON DELETE ... clause for foreign keys when a parent row is Used in ON DELETE ... clause for foreign keys when a parent row is
@ -533,8 +480,12 @@ row_ins_foreign_delete_or_set_null(
err = lock_table(0, table, LOCK_IX, thr); err = lock_table(0, table, LOCK_IX, thr);
if (err == DB_SUCCESS) { if (err == DB_SUCCESS) {
/* Here it suffices to use a LOCK_REC_NOT_GAP type lock;
we already have a normal shared lock on the appropriate
gap if the search criterion was not unique */
err = lock_clust_rec_read_check_and_lock(0, clust_rec, err = lock_clust_rec_read_check_and_lock(0, clust_rec,
clust_index, LOCK_X, thr); clust_index, LOCK_X, LOCK_REC_NOT_GAP, thr);
} }
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
@ -630,12 +581,14 @@ nonstandard_exit_func:
/************************************************************************* /*************************************************************************
Sets a shared lock on a record. Used in locking possible duplicate key Sets a shared lock on a record. Used in locking possible duplicate key
records. */ records and also in checking foreign key constraints. */
static static
ulint ulint
row_ins_set_shared_rec_lock( row_ins_set_shared_rec_lock(
/*========================*/ /*========================*/
/* out: DB_SUCCESS or error code */ /* out: DB_SUCCESS or error code */
ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP type lock */
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
dict_index_t* index, /* in: index */ dict_index_t* index, /* in: index */
que_thr_t* thr) /* in: query thread */ que_thr_t* thr) /* in: query thread */
@ -644,10 +597,10 @@ row_ins_set_shared_rec_lock(
if (index->type & DICT_CLUSTERED) { if (index->type & DICT_CLUSTERED) {
err = lock_clust_rec_read_check_and_lock(0, rec, index, LOCK_S, err = lock_clust_rec_read_check_and_lock(0, rec, index, LOCK_S,
thr); type, thr);
} else { } else {
err = lock_sec_rec_read_check_and_lock(0, rec, index, LOCK_S, err = lock_sec_rec_read_check_and_lock(0, rec, index, LOCK_S,
thr); type, thr);
} }
return(err); return(err);
@ -656,7 +609,7 @@ row_ins_set_shared_rec_lock(
/******************************************************************* /*******************************************************************
Checks if foreign key constraint fails for an index entry. Sets shared locks Checks if foreign key constraint fails for an index entry. Sets shared locks
which lock either the success or the failure of the constraint. NOTE that which lock either the success or the failure of the constraint. NOTE that
the caller must have a shared latch on dict_foreign_key_check_lock. */ the caller must have a shared latch on dict_operation_lock. */
ulint ulint
row_ins_check_foreign_constraint( row_ins_check_foreign_constraint(
@ -679,7 +632,7 @@ row_ins_check_foreign_constraint(
dict_table_t* check_table; dict_table_t* check_table;
dict_index_t* check_index; dict_index_t* check_index;
ulint n_fields_cmp; ulint n_fields_cmp;
ibool timeout_expired; ibool unique_search;
rec_t* rec; rec_t* rec;
btr_pcur_t pcur; btr_pcur_t pcur;
ibool moved; ibool moved;
@ -689,7 +642,9 @@ row_ins_check_foreign_constraint(
mtr_t mtr; mtr_t mtr;
run_again: run_again:
ut_ad(rw_lock_own(&dict_foreign_key_check_lock, RW_LOCK_SHARED)); ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
err = DB_SUCCESS;
if (thr_get_trx(thr)->check_foreigns == FALSE) { if (thr_get_trx(thr)->check_foreigns == FALSE) {
/* The user has suppressed foreign key checks currently for /* The user has suppressed foreign key checks currently for
@ -748,6 +703,14 @@ run_again:
dtuple_set_n_fields_cmp(entry, foreign->n_fields); dtuple_set_n_fields_cmp(entry, foreign->n_fields);
if (dict_index_get_n_unique(check_index) <= foreign->n_fields) {
/* We can just set a LOCK_REC_NOT_GAP type lock */
unique_search = TRUE;
} else {
unique_search = FALSE;
}
btr_pcur_open(check_index, entry, PAGE_CUR_GE, btr_pcur_open(check_index, entry, PAGE_CUR_GE,
BTR_SEARCH_LEAF, &pcur, &mtr); BTR_SEARCH_LEAF, &pcur, &mtr);
@ -761,26 +724,46 @@ run_again:
goto next_rec; goto next_rec;
} }
/* Try to place a lock on the index record */ if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
err = row_ins_set_shared_rec_lock(rec, check_index, thr);
err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, rec,
check_index, thr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
break; break;
} }
if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
goto next_rec; goto next_rec;
} }
cmp = cmp_dtuple_rec(entry, rec); cmp = cmp_dtuple_rec(entry, rec);
if (cmp == 0) { if (cmp == 0) {
if (!rec_get_deleted_flag(rec)) { if (rec_get_deleted_flag(rec)) {
err = row_ins_set_shared_rec_lock(LOCK_ORDINARY,
rec, check_index, thr);
if (err != DB_SUCCESS) {
break;
}
} else {
/* Found a matching record */ /* Found a matching record */
if (unique_search) {
err = row_ins_set_shared_rec_lock(
LOCK_REC_NOT_GAP,
rec, check_index, thr);
} else {
err = row_ins_set_shared_rec_lock(
LOCK_ORDINARY,
rec, check_index, thr);
}
if (err != DB_SUCCESS) {
break;
}
/* printf( /* printf(
"FOREIGN: Found matching record from %s %s\n", "FOREIGN: Found matching record from %s %s\n",
check_index->table_name, check_index->name); check_index->table_name, check_index->name);
@ -807,6 +790,13 @@ run_again:
} }
if (cmp < 0) { if (cmp < 0) {
err = row_ins_set_shared_rec_lock(LOCK_GAP,
rec, check_index, thr);
if (err != DB_SUCCESS) {
break;
}
if (check_ref) { if (check_ref) {
err = DB_NO_REFERENCED_ROW; err = DB_NO_REFERENCED_ROW;
} else { } else {
@ -844,14 +834,14 @@ do_possible_lock_wait:
que_thr_stop_for_mysql(thr); que_thr_stop_for_mysql(thr);
timeout_expired = srv_suspend_mysql_thread(thr); srv_suspend_mysql_thread(thr);
if (!timeout_expired) { if (thr_get_trx(thr)->error_state == DB_SUCCESS) {
goto run_again; goto run_again;
} }
err = DB_LOCK_WAIT_TIMEOUT; err = thr_get_trx(thr)->error_state;
} }
return(err); return(err);
@ -890,21 +880,21 @@ row_ins_check_foreign_constraints(
trx); trx);
} }
if (!trx->has_dict_foreign_key_check_lock) { if (!trx->has_dict_operation_lock) {
got_s_lock = TRUE; got_s_lock = TRUE;
rw_lock_s_lock(&dict_foreign_key_check_lock); rw_lock_s_lock(&dict_operation_lock);
trx->has_dict_foreign_key_check_lock = TRUE; trx->has_dict_operation_lock = TRUE;
} }
err = row_ins_check_foreign_constraint(TRUE, foreign, err = row_ins_check_foreign_constraint(TRUE, foreign,
table, index, entry, thr); table, index, entry, thr);
if (got_s_lock) { if (got_s_lock) {
rw_lock_s_unlock(&dict_foreign_key_check_lock); rw_lock_s_unlock(&dict_operation_lock);
trx->has_dict_foreign_key_check_lock = FALSE; trx->has_dict_operation_lock = FALSE;
} }
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
@ -918,6 +908,59 @@ row_ins_check_foreign_constraints(
return(DB_SUCCESS); return(DB_SUCCESS);
} }
/*******************************************************************
Checks if a unique key violation to rec would occur at the index entry
insert. */
static
ibool
row_ins_dupl_error_with_rec(
/*========================*/
/* out: TRUE if error */
rec_t* rec, /* in: user record; NOTE that we assume
that the caller already has a record lock on
the record! */
dtuple_t* entry, /* in: entry to insert */
dict_index_t* index) /* in: index */
{
ulint matched_fields;
ulint matched_bytes;
ulint n_unique;
ulint i;
n_unique = dict_index_get_n_unique(index);
matched_fields = 0;
matched_bytes = 0;
cmp_dtuple_rec_with_match(entry, rec, &matched_fields, &matched_bytes);
if (matched_fields < n_unique) {
return(FALSE);
}
/* In a unique secondary index we allow equal key values if they
contain SQL NULLs */
if (!(index->type & DICT_CLUSTERED)) {
for (i = 0; i < n_unique; i++) {
if (UNIV_SQL_NULL == dfield_get_len(
dtuple_get_nth_field(entry, i))) {
return(FALSE);
}
}
}
if (!rec_get_deleted_flag(rec)) {
return(TRUE);
}
return(FALSE);
}
/******************************************************************* /*******************************************************************
Scans a unique non-clustered index at a given index entry to determine Scans a unique non-clustered index at a given index entry to determine
whether a uniqueness violation has occurred for the key value of the entry. whether a uniqueness violation has occurred for the key value of the entry.
@ -978,7 +1021,8 @@ row_ins_scan_sec_index_for_duplicate(
/* Try to place a lock on the index record */ /* Try to place a lock on the index record */
err = row_ins_set_shared_rec_lock(rec, index, thr); err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, rec, index,
thr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
@ -1082,8 +1126,8 @@ row_ins_duplicate_error_in_clust(
sure that in roll-forward we get the same duplicate sure that in roll-forward we get the same duplicate
errors as in original execution */ errors as in original execution */
err = row_ins_set_shared_rec_lock(rec, cursor->index, err = row_ins_set_shared_rec_lock(LOCK_REC_NOT_GAP,
thr); rec, cursor->index, thr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
return(err); return(err);
@ -1105,8 +1149,8 @@ row_ins_duplicate_error_in_clust(
if (rec != page_get_supremum_rec(page)) { if (rec != page_get_supremum_rec(page)) {
err = row_ins_set_shared_rec_lock(rec, cursor->index, err = row_ins_set_shared_rec_lock(LOCK_REC_NOT_GAP,
thr); rec, cursor->index, thr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
return(err); return(err);

View File

@ -27,6 +27,7 @@ Created 9/17/2000 Heikki Tuuri
#include "lock0lock.h" #include "lock0lock.h"
#include "rem0cmp.h" #include "rem0cmp.h"
#include "log0log.h" #include "log0log.h"
#include "btr0sea.h"
/* A dummy variable used to fool the compiler */ /* A dummy variable used to fool the compiler */
ibool row_mysql_identically_false = FALSE; ibool row_mysql_identically_false = FALSE;
@ -203,7 +204,6 @@ row_mysql_handle_errors(
que_thr_t* thr, /* in: query thread */ que_thr_t* thr, /* in: query thread */
trx_savept_t* savept) /* in: savepoint or NULL */ trx_savept_t* savept) /* in: savepoint or NULL */
{ {
ibool timeout_expired;
ulint err; ulint err;
handle_new_error: handle_new_error:
@ -240,11 +240,9 @@ handle_new_error:
/* MySQL will roll back the latest SQL statement */ /* MySQL will roll back the latest SQL statement */
} else if (err == DB_LOCK_WAIT) { } else if (err == DB_LOCK_WAIT) {
timeout_expired = srv_suspend_mysql_thread(thr); srv_suspend_mysql_thread(thr);
if (timeout_expired) {
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
if (trx->error_state != DB_SUCCESS) {
que_thr_stop_for_mysql(thr); que_thr_stop_for_mysql(thr);
goto handle_new_error; goto handle_new_error;
@ -1146,7 +1144,7 @@ row_mysql_lock_data_dictionary(void)
/* Serialize data dictionary operations with dictionary mutex: /* Serialize data dictionary operations with dictionary mutex:
no deadlocks or lock waits can occur then in these operations */ no deadlocks or lock waits can occur then in these operations */
rw_lock_x_lock(&(dict_foreign_key_check_lock)); rw_lock_x_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex)); mutex_enter(&(dict_sys->mutex));
} }
@ -1161,7 +1159,7 @@ row_mysql_unlock_data_dictionary(void)
no deadlocks can occur then in these operations */ no deadlocks can occur then in these operations */
mutex_exit(&(dict_sys->mutex)); mutex_exit(&(dict_sys->mutex));
rw_lock_x_unlock(&(dict_foreign_key_check_lock)); rw_lock_x_unlock(&dict_operation_lock);
} }
/************************************************************************* /*************************************************************************
@ -1184,6 +1182,7 @@ row_create_table_for_mysql(
ulint err; ulint err;
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
ut_ad(mutex_own(&(dict_sys->mutex))); ut_ad(mutex_own(&(dict_sys->mutex)));
if (srv_created_new_raw) { if (srv_created_new_raw) {
@ -1384,6 +1383,7 @@ row_create_index_for_mysql(
ulint keywordlen; ulint keywordlen;
ulint err; ulint err;
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
ut_ad(mutex_own(&(dict_sys->mutex))); ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
@ -1464,6 +1464,7 @@ row_table_add_foreign_constraints(
ulint err; ulint err;
ut_ad(mutex_own(&(dict_sys->mutex))); ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
ut_a(sql_string); ut_a(sql_string);
trx->op_info = (char *) "adding foreign keys"; trx->op_info = (char *) "adding foreign keys";
@ -1846,12 +1847,16 @@ row_drop_table_for_mysql(
no deadlocks can occur then in these operations */ no deadlocks can occur then in these operations */
if (!has_dict_mutex) { if (!has_dict_mutex) {
/* Prevent foreign key checks while we are dropping the table */ /* Prevent foreign key checks etc. while we are dropping the
rw_lock_x_lock(&(dict_foreign_key_check_lock)); table */
rw_lock_x_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex)); mutex_enter(&(dict_sys->mutex));
} }
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
graph = pars_sql(buf); graph = pars_sql(buf);
ut_a(graph); ut_a(graph);
@ -1861,9 +1866,6 @@ row_drop_table_for_mysql(
graph->fork_type = QUE_FORK_MYSQL_INTERFACE; graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
/* Prevent purge from running while we are dropping the table */
rw_lock_s_lock(&(purge_sys->purge_is_running));
table = dict_table_get_low(name); table = dict_table_get_low(name);
if (!table) { if (!table) {
@ -1945,11 +1947,10 @@ row_drop_table_for_mysql(
} }
} }
funct_exit: funct_exit:
rw_lock_s_unlock(&(purge_sys->purge_is_running));
if (!has_dict_mutex) { if (!has_dict_mutex) {
mutex_exit(&(dict_sys->mutex)); mutex_exit(&(dict_sys->mutex));
rw_lock_x_unlock(&(dict_foreign_key_check_lock)); rw_lock_x_unlock(&dict_operation_lock);
} }
que_graph_free(graph); que_graph_free(graph);
@ -1985,7 +1986,7 @@ row_drop_database_for_mysql(
trx_start_if_not_started(trx); trx_start_if_not_started(trx);
loop: loop:
rw_lock_x_lock(&(dict_foreign_key_check_lock)); rw_lock_x_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex)); mutex_enter(&(dict_sys->mutex));
while ((table_name = dict_get_first_table_name_in_db(name))) { while ((table_name = dict_get_first_table_name_in_db(name))) {
@ -2000,7 +2001,7 @@ loop:
if (table->n_mysql_handles_opened > 0) { if (table->n_mysql_handles_opened > 0) {
mutex_exit(&(dict_sys->mutex)); mutex_exit(&(dict_sys->mutex));
rw_lock_x_unlock(&(dict_foreign_key_check_lock)); rw_lock_x_unlock(&dict_operation_lock);
ut_print_timestamp(stderr); ut_print_timestamp(stderr);
fprintf(stderr, fprintf(stderr,
@ -2028,7 +2029,7 @@ loop:
} }
mutex_exit(&(dict_sys->mutex)); mutex_exit(&(dict_sys->mutex));
rw_lock_x_unlock(&(dict_foreign_key_check_lock)); rw_lock_x_unlock(&dict_operation_lock);
trx_commit_for_mysql(trx); trx_commit_for_mysql(trx);
@ -2165,7 +2166,7 @@ row_rename_table_for_mysql(
/* Serialize data dictionary operations with dictionary mutex: /* Serialize data dictionary operations with dictionary mutex:
no deadlocks can occur then in these operations */ no deadlocks can occur then in these operations */
rw_lock_x_lock(&(dict_foreign_key_check_lock)); rw_lock_x_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex)); mutex_enter(&(dict_sys->mutex));
table = dict_table_get_low(old_name); table = dict_table_get_low(old_name);
@ -2249,7 +2250,7 @@ row_rename_table_for_mysql(
} }
funct_exit: funct_exit:
mutex_exit(&(dict_sys->mutex)); mutex_exit(&(dict_sys->mutex));
rw_lock_x_unlock(&(dict_foreign_key_check_lock)); rw_lock_x_unlock(&dict_operation_lock);
que_graph_free(graph); que_graph_free(graph);
@ -2399,9 +2400,19 @@ row_check_table_for_mysql(
ulint n_rows; ulint n_rows;
ulint n_rows_in_table = ULINT_UNDEFINED; ulint n_rows_in_table = ULINT_UNDEFINED;
ulint ret = DB_SUCCESS; ulint ret = DB_SUCCESS;
ulint old_isolation_level;
prebuilt->trx->op_info = (char *) "checking table"; prebuilt->trx->op_info = (char *) "checking table";
old_isolation_level = prebuilt->trx->isolation_level;
/* We must run the index record counts at an isolation level
>= READ COMMITTED, because a dirty read can see a wrong number
of records in some index; to play safe, we use always
REPEATABLE READ here */
prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
index = dict_table_get_first_index(table); index = dict_table_get_first_index(table);
while (index != NULL) { while (index != NULL) {
@ -2433,6 +2444,9 @@ row_check_table_for_mysql(
index = dict_table_get_next_index(index); index = dict_table_get_next_index(index);
} }
/* Restore the original isolation level */
prebuilt->trx->isolation_level = old_isolation_level;
/* We validate also the whole adaptive hash index for all tables /* We validate also the whole adaptive hash index for all tables
at every CHECK TABLE */ at every CHECK TABLE */

View File

@ -453,7 +453,9 @@ static
ibool ibool
row_purge_parse_undo_rec( row_purge_parse_undo_rec(
/*=====================*/ /*=====================*/
/* out: TRUE if purge operation required */ /* out: TRUE if purge operation required:
NOTE that then the CALLER must s-unlock
dict_operation_lock! */
purge_node_t* node, /* in: row undo node */ purge_node_t* node, /* in: row undo node */
ibool* updated_extern, ibool* updated_extern,
/* out: TRUE if an externally stored field /* out: TRUE if an externally stored field
@ -493,18 +495,20 @@ row_purge_parse_undo_rec(
return(FALSE); return(FALSE);
} }
/* Prevent DROP TABLE etc. from running when we are doing the purge
for this row */
rw_lock_s_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex)); mutex_enter(&(dict_sys->mutex));
node->table = dict_table_get_on_id_low(table_id, thr_get_trx(thr)); node->table = dict_table_get_on_id_low(table_id, thr_get_trx(thr));
rw_lock_x_lock(&(purge_sys->purge_is_running));
mutex_exit(&(dict_sys->mutex)); mutex_exit(&(dict_sys->mutex));
if (node->table == NULL) { if (node->table == NULL) {
/* The table has been dropped: no need to do purge */ /* The table has been dropped: no need to do purge */
rw_lock_x_unlock(&(purge_sys->purge_is_running)); rw_lock_s_unlock(&dict_operation_lock);
return(FALSE); return(FALSE);
} }
@ -514,7 +518,7 @@ row_purge_parse_undo_rec(
if (clust_index == NULL) { if (clust_index == NULL) {
/* The table was corrupt in the data dictionary */ /* The table was corrupt in the data dictionary */
rw_lock_x_unlock(&(purge_sys->purge_is_running)); rw_lock_s_unlock(&dict_operation_lock);
return(FALSE); return(FALSE);
} }
@ -573,6 +577,8 @@ row_purge(
} else { } else {
purge_needed = row_purge_parse_undo_rec(node, &updated_extern, purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
thr); thr);
/* If purge_needed == TRUE, we must also remember to unlock
dict_operation_lock! */
} }
if (purge_needed) { if (purge_needed) {
@ -594,7 +600,7 @@ row_purge(
btr_pcur_close(&(node->pcur)); btr_pcur_close(&(node->pcur));
} }
rw_lock_x_unlock(&(purge_sys->purge_is_running)); rw_lock_s_unlock(&dict_operation_lock);
} }
/* Do some cleanup */ /* Do some cleanup */

View File

@ -606,7 +606,7 @@ row_sel_get_clust_rec(
/* Try to place a lock on the index record */ /* Try to place a lock on the index record */
err = lock_clust_rec_read_check_and_lock(0, clust_rec, index, err = lock_clust_rec_read_check_and_lock(0, clust_rec, index,
node->row_lock_mode, thr); node->row_lock_mode, LOCK_ORDINARY, thr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
return(err); return(err);
@ -678,16 +678,17 @@ sel_set_rec_lock(
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
dict_index_t* index, /* in: index */ dict_index_t* index, /* in: index */
ulint mode, /* in: lock mode */ ulint mode, /* in: lock mode */
ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or LOC_REC_NOT_GAP */
que_thr_t* thr) /* in: query thread */ que_thr_t* thr) /* in: query thread */
{ {
ulint err; ulint err;
if (index->type & DICT_CLUSTERED) { if (index->type & DICT_CLUSTERED) {
err = lock_clust_rec_read_check_and_lock(0, rec, index, mode, err = lock_clust_rec_read_check_and_lock(0, rec, index, mode,
thr); type, thr);
} else { } else {
err = lock_sec_rec_read_check_and_lock(0, rec, index, mode, err = lock_sec_rec_read_check_and_lock(0, rec, index, mode,
thr); type, thr);
} }
return(err); return(err);
@ -1154,7 +1155,7 @@ rec_loop:
if (!consistent_read) { if (!consistent_read) {
err = sel_set_rec_lock(page_rec_get_next(rec), index, err = sel_set_rec_lock(page_rec_get_next(rec), index,
node->row_lock_mode, thr); node->row_lock_mode, LOCK_ORDINARY, thr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
/* Note that in this case we will store in pcur /* Note that in this case we will store in pcur
the PREDECESSOR of the record we are waiting the PREDECESSOR of the record we are waiting
@ -1180,8 +1181,8 @@ rec_loop:
if (!consistent_read) { if (!consistent_read) {
/* Try to place a lock on the index record */ /* Try to place a lock on the index record */
err = sel_set_rec_lock(rec, index, node->row_lock_mode, thr); err = sel_set_rec_lock(rec, index, node->row_lock_mode,
LOCK_ORDINARY, thr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
goto lock_wait_or_error; goto lock_wait_or_error;
@ -2200,6 +2201,7 @@ row_sel_get_clust_rec_for_mysql(
rec_t* old_vers; rec_t* old_vers;
ulint err; ulint err;
trx_t* trx; trx_t* trx;
char err_buf[1000];
*out_rec = NULL; *out_rec = NULL;
@ -2213,14 +2215,40 @@ row_sel_get_clust_rec_for_mysql(
clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur); clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
ut_ad(page_rec_is_user_rec(clust_rec)); if (!page_rec_is_user_rec(clust_rec)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: error clustered record for sec rec not found\n"
"InnoDB: index %s table %s\n", sec_index->name,
sec_index->table->name);
rec_sprintf(err_buf, 900, rec);
fprintf(stderr, "InnoDB: sec index record %s\n", err_buf);
rec_sprintf(err_buf, 900, clust_rec);
fprintf(stderr, "InnoDB: clust index record %s\n", err_buf);
trx_print(err_buf, trx);
fprintf(stderr,
"%s\nInnoDB: Make a detailed bug report and send it\n",
err_buf);
fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n");
clust_rec = NULL;
goto func_exit;
}
if (prebuilt->select_lock_type != LOCK_NONE) { if (prebuilt->select_lock_type != LOCK_NONE) {
/* Try to place a lock on the index record */ /* Try to place a lock on the index record; we are searching
the clust rec with a unique condition, hence
we set a LOCK_REC_NOT_GAP type lock */
err = lock_clust_rec_read_check_and_lock(0, clust_rec, err = lock_clust_rec_read_check_and_lock(0, clust_rec,
clust_index, clust_index,
prebuilt->select_lock_type, thr); prebuilt->select_lock_type,
LOCK_REC_NOT_GAP, thr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
return(err); return(err);
@ -2233,7 +2261,11 @@ row_sel_get_clust_rec_for_mysql(
old_vers = NULL; old_vers = NULL;
if (!lock_clust_rec_cons_read_sees(clust_rec, clust_index, /* If the isolation level allows reading of uncommitted data,
then we never look for an earlier version */
if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
&& !lock_clust_rec_cons_read_sees(clust_rec, clust_index,
trx->read_view)) { trx->read_view)) {
err = row_sel_build_prev_vers_for_mysql( err = row_sel_build_prev_vers_for_mysql(
@ -2275,6 +2307,7 @@ row_sel_get_clust_rec_for_mysql(
} }
} }
func_exit:
*out_rec = clust_rec; *out_rec = clust_rec;
if (prebuilt->select_lock_type == LOCK_X) { if (prebuilt->select_lock_type == LOCK_X) {
@ -2407,7 +2440,7 @@ row_sel_push_cache_row_for_mysql(
/************************************************************************* /*************************************************************************
Tries to do a shortcut to fetch a clustered index record with a unique key, Tries to do a shortcut to fetch a clustered index record with a unique key,
using the hash index if possible (not always). We assume that the search using the hash index if possible (not always). We assume that the search
mode is PAGE_CUR_GE, it is a consistent read, trx has already a read view, mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
btr search latch has been locked in S-mode. */ btr search latch has been locked in S-mode. */
static static
ulint ulint
@ -2516,17 +2549,22 @@ row_search_for_mysql(
ibool was_lock_wait; ibool was_lock_wait;
ulint ret; ulint ret;
ulint shortcut; ulint shortcut;
ibool unique_search = FALSE;
ibool unique_search_from_clust_index = FALSE; ibool unique_search_from_clust_index = FALSE;
ibool mtr_has_extra_clust_latch = FALSE; ibool mtr_has_extra_clust_latch = FALSE;
ibool moves_up = FALSE; ibool moves_up = FALSE;
ibool set_also_gap_locks = TRUE;
/* if the query is a plain
locking SELECT, and the isolation
level is <= TRX_ISO_READ_COMMITTED,
then this is set to FALSE */
ibool success;
ulint cnt = 0; ulint cnt = 0;
mtr_t mtr; mtr_t mtr;
ut_ad(index && pcur && search_tuple); ut_ad(index && pcur && search_tuple);
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
ut_ad(sync_thread_levels_empty_gen(FALSE));
if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
fprintf(stderr, fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n" "InnoDB: Error: trying to free a corrupt\n"
@ -2543,6 +2581,9 @@ row_search_for_mysql(
printf("N tables locked %lu\n", trx->mysql_n_tables_locked); printf("N tables locked %lu\n", trx->mysql_n_tables_locked);
*/ */
/*-------------------------------------------------------------*/
/* PHASE 1: Try to pop the row from the prefetch cache */
if (direction == 0) { if (direction == 0) {
trx->op_info = (char *) "starting index read"; trx->op_info = (char *) "starting index read";
@ -2608,18 +2649,35 @@ row_search_for_mysql(
mtr_start(&mtr); mtr_start(&mtr);
/* Since we must release the search system latch when we retrieve an /* In a search where at most one record in the index may match, we
externally stored field, we cannot use the adaptive hash index in a can use a LOCK_REC_NOT_GAP type record lock when locking a non-delete
search in the case the row may be long and there may be externally marked matching record.
stored fields */
Note that in a unique secondary index there may be different delete
marked versions of a record where only the primary key values differ:
thus in a secondary index we must use next-key locks when locking
delete marked records. */
if (match_mode == ROW_SEL_EXACT if (match_mode == ROW_SEL_EXACT
&& index->type & DICT_UNIQUE && index->type & DICT_UNIQUE
&& index->type & DICT_CLUSTERED
&& !prebuilt->templ_contains_blob
&& (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)
&& dtuple_get_n_fields(search_tuple) && dtuple_get_n_fields(search_tuple)
== dict_index_get_n_unique(index)) { == dict_index_get_n_unique(index)) {
unique_search = TRUE;
}
/*-------------------------------------------------------------*/
/* PHASE 2: Try fast adaptive hash index search if possible */
/* Next test if this is the special case where we can use the fast
adaptive hash index to try the search. Since we must release the
search system latch when we retrieve an externally stored field, we
cannot use the adaptive hash index in a search in the case the row
may be long and there may be externally stored fields */
if (unique_search
&& index->type & DICT_CLUSTERED
&& !prebuilt->templ_contains_blob
&& (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
if (direction == ROW_SEL_NEXT) { if (direction == ROW_SEL_NEXT) {
/* MySQL sometimes seems to do fetch next even /* MySQL sometimes seems to do fetch next even
@ -2642,8 +2700,9 @@ row_search_for_mysql(
unique_search_from_clust_index = TRUE; unique_search_from_clust_index = TRUE;
if (trx->mysql_n_tables_locked == 0 if (prebuilt->select_lock_type == LOCK_NONE
&& !prebuilt->sql_stat_start) { && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
&& trx->read_view) {
/* This is a SELECT query done as a consistent read, /* This is a SELECT query done as a consistent read,
and the read view has already been allocated: and the read view has already been allocated:
@ -2722,7 +2781,11 @@ row_search_for_mysql(
mtr_start(&mtr); mtr_start(&mtr);
} }
} }
no_shortcut: no_shortcut:
/*-------------------------------------------------------------*/
/* PHASE 3: Open or restore index cursor position */
if (trx->has_search_latch) { if (trx->has_search_latch) {
rw_lock_s_unlock(&btr_search_latch); rw_lock_s_unlock(&btr_search_latch);
trx->has_search_latch = FALSE; trx->has_search_latch = FALSE;
@ -2730,6 +2793,23 @@ no_shortcut:
trx_start_if_not_started(trx); trx_start_if_not_started(trx);
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
&& prebuilt->select_lock_type != LOCK_NONE
&& trx->mysql_query_str) {
/* Scan the MySQL query string; check if SELECT is the first
word there */
dict_accept(*trx->mysql_query_str, "SELECT", &success);
if (success) {
/* It is a plain locking SELECT and the isolation
level is low: do not lock gaps */
set_also_gap_locks = FALSE;
}
}
/* Note that if the search mode was GE or G, then the cursor /* Note that if the search mode was GE or G, then the cursor
naturally moves upward (in fetch next) in alphabetical order, naturally moves upward (in fetch next) in alphabetical order,
otherwise downward */ otherwise downward */
@ -2793,8 +2873,10 @@ no_shortcut:
prebuilt->sql_stat_start = FALSE; prebuilt->sql_stat_start = FALSE;
} }
/*-------------------------------------------------------------*/
rec_loop: rec_loop:
/*-------------------------------------------------------------*/
/* PHASE 4: Look for matching records in a loop */
cons_read_requires_clust_rec = FALSE; cons_read_requires_clust_rec = FALSE;
rec = btr_pcur_get_rec(pcur); rec = btr_pcur_get_rec(pcur);
@ -2813,21 +2895,23 @@ rec_loop:
goto next_rec; goto next_rec;
} }
if (prebuilt->select_lock_type != LOCK_NONE) { if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
if (prebuilt->select_lock_type != LOCK_NONE
&& set_also_gap_locks) {
/* Try to place a lock on the index record */ /* Try to place a lock on the index record */
err = sel_set_rec_lock(rec, index, prebuilt->select_lock_type, err = sel_set_rec_lock(rec, index,
thr); prebuilt->select_lock_type,
LOCK_ORDINARY, thr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
goto lock_wait_or_error; goto lock_wait_or_error;
} }
} }
if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
/* A page supremum record cannot be in the result set: skip /* A page supremum record cannot be in the result set: skip
it now when we have placed a possible lock on it */ it now that we have placed a possible lock on it */
goto next_rec; goto next_rec;
} }
@ -2850,6 +2934,19 @@ rec_loop:
if (0 != cmp_dtuple_rec(search_tuple, rec)) { if (0 != cmp_dtuple_rec(search_tuple, rec)) {
if (prebuilt->select_lock_type != LOCK_NONE
&& set_also_gap_locks) {
/* Try to place a lock on the index record */
err = sel_set_rec_lock(rec, index,
prebuilt->select_lock_type,
LOCK_GAP, thr);
if (err != DB_SUCCESS) {
goto lock_wait_or_error;
}
}
btr_pcur_store_position(pcur, &mtr); btr_pcur_store_position(pcur, &mtr);
ret = DB_RECORD_NOT_FOUND; ret = DB_RECORD_NOT_FOUND;
@ -2862,6 +2959,19 @@ rec_loop:
if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec)) { if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec)) {
if (prebuilt->select_lock_type != LOCK_NONE
&& set_also_gap_locks) {
/* Try to place a lock on the index record */
err = sel_set_rec_lock(rec, index,
prebuilt->select_lock_type,
LOCK_GAP, thr);
if (err != DB_SUCCESS) {
goto lock_wait_or_error;
}
}
btr_pcur_store_position(pcur, &mtr); btr_pcur_store_position(pcur, &mtr);
ret = DB_RECORD_NOT_FOUND; ret = DB_RECORD_NOT_FOUND;
@ -2874,16 +2984,39 @@ rec_loop:
/* We are ready to look at a possible new index entry in the result /* We are ready to look at a possible new index entry in the result
set: the cursor is now placed on a user record */ set: the cursor is now placed on a user record */
/* Get the right version of the row in a consistent read */ if (prebuilt->select_lock_type != LOCK_NONE) {
/* Try to place a lock on the index record; note that delete
marked records are a special case in a unique search. If there
is a non-delete marked record, then it is enough to lock its
existence with LOCK_REC_NOT_GAP. */
if (prebuilt->select_lock_type == LOCK_NONE) { if (!set_also_gap_locks
|| (unique_search && !rec_get_deleted_flag(rec))) {
err = sel_set_rec_lock(rec, index,
prebuilt->select_lock_type,
LOCK_REC_NOT_GAP, thr);
} else {
err = sel_set_rec_lock(rec, index,
prebuilt->select_lock_type,
LOCK_ORDINARY, thr);
}
if (err != DB_SUCCESS) {
goto lock_wait_or_error;
}
} else {
/* This is a non-locking consistent read: if necessary, fetch /* This is a non-locking consistent read: if necessary, fetch
a previous version of the record */ a previous version of the record */
cons_read_requires_clust_rec = FALSE; cons_read_requires_clust_rec = FALSE;
if (index == clust_index) { if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
/* Do nothing: we let a non-locking SELECT read the
latest version of the record */
} else if (index == clust_index) {
if (!lock_clust_rec_cons_read_sees(rec, index, if (!lock_clust_rec_cons_read_sees(rec, index,
trx->read_view)) { trx->read_view)) {
@ -3020,8 +3153,11 @@ got_row:
ret = DB_SUCCESS; ret = DB_SUCCESS;
goto normal_return; goto normal_return;
/*-------------------------------------------------------------*/
next_rec: next_rec:
/*-------------------------------------------------------------*/
/* PHASE 5: Move the cursor to the next index record */
if (mtr_has_extra_clust_latch) { if (mtr_has_extra_clust_latch) {
/* We must commit mtr if we are moving to the next /* We must commit mtr if we are moving to the next
non-clustered index record, because we could break the non-clustered index record, because we could break the
@ -3064,8 +3200,10 @@ next_rec:
cnt++; cnt++;
goto rec_loop; goto rec_loop;
/*-------------------------------------------------------------*/
lock_wait_or_error: lock_wait_or_error:
/*-------------------------------------------------------------*/
btr_pcur_store_position(pcur, &mtr); btr_pcur_store_position(pcur, &mtr);
mtr_commit(&mtr); mtr_commit(&mtr);
@ -3096,6 +3234,7 @@ lock_wait_or_error:
return(err); return(err);
normal_return: normal_return:
/*-------------------------------------------------------------*/
que_thr_stop_for_mysql_no_error(thr, trx); que_thr_stop_for_mysql_no_error(thr, trx);
mtr_commit(&mtr); mtr_commit(&mtr);
@ -3156,10 +3295,12 @@ row_search_check_if_query_cache_permitted(
ret = TRUE; ret = TRUE;
/* Assign a read view for the transaction if it does not yet /* If the isolation level is high, assign a read view for the
have one */ transaction if it does not yet have one */
if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
&& !trx->read_view) {
if (!trx->read_view) {
trx->read_view = read_view_open_now(trx, trx->read_view = read_view_open_now(trx,
trx->read_view_heap); trx->read_view_heap);
} }

View File

@ -254,6 +254,7 @@ row_undo_ins_parse_undo_rec(
node->table = dict_table_get_on_id(table_id, node->trx); node->table = dict_table_get_on_id(table_id, node->trx);
if (node->table == NULL) { if (node->table == NULL) {
return; return;
} }
@ -292,6 +293,7 @@ row_undo_ins(
if (!found) { if (!found) {
trx_undo_rec_release(node->trx, node->undo_no); trx_undo_rec_release(node->trx, node->undo_no);
return(DB_SUCCESS); return(DB_SUCCESS);
} }

View File

@ -211,7 +211,6 @@ row_undo(
if (node->state == UNDO_NODE_FETCH_NEXT) { if (node->state == UNDO_NODE_FETCH_NEXT) {
/* The call below also starts &mtr */
node->undo_rec = trx_roll_pop_top_rec_of_trx(trx, node->undo_rec = trx_roll_pop_top_rec_of_trx(trx,
trx->roll_limit, trx->roll_limit,
&roll_ptr, &roll_ptr,
@ -254,6 +253,10 @@ row_undo(
} }
} }
/* Prevent DROP TABLE etc. while we are rolling back this row */
rw_lock_s_lock(&dict_operation_lock);
if (node->state == UNDO_NODE_INSERT) { if (node->state == UNDO_NODE_INSERT) {
err = row_undo_ins(node, thr); err = row_undo_ins(node, thr);
@ -264,6 +267,8 @@ row_undo(
err = row_undo_mod(node, thr); err = row_undo_mod(node, thr);
} }
rw_lock_s_unlock(&dict_operation_lock);
/* Do some cleanup */ /* Do some cleanup */
btr_pcur_close(&(node->pcur)); btr_pcur_close(&(node->pcur));

View File

@ -79,7 +79,7 @@ ibool
row_upd_index_is_referenced( row_upd_index_is_referenced(
/*========================*/ /*========================*/
/* out: TRUE if referenced; NOTE that since /* out: TRUE if referenced; NOTE that since
we do not hold dict_foreign_key_check_lock we do not hold dict_operation_lock
when leaving the function, it may be that when leaving the function, it may be that
the referencing table has been dropped when the referencing table has been dropped when
we leave this function: this function is only we leave this function: this function is only
@ -95,8 +95,8 @@ row_upd_index_is_referenced(
return(FALSE); return(FALSE);
} }
if (!trx->has_dict_foreign_key_check_lock) { if (!trx->has_dict_operation_lock) {
rw_lock_s_lock(&dict_foreign_key_check_lock); rw_lock_s_lock(&dict_operation_lock);
} }
foreign = UT_LIST_GET_FIRST(table->referenced_list); foreign = UT_LIST_GET_FIRST(table->referenced_list);
@ -104,8 +104,8 @@ row_upd_index_is_referenced(
while (foreign) { while (foreign) {
if (foreign->referenced_index == index) { if (foreign->referenced_index == index) {
if (!trx->has_dict_foreign_key_check_lock) { if (!trx->has_dict_operation_lock) {
rw_lock_s_unlock(&dict_foreign_key_check_lock); rw_lock_s_unlock(&dict_operation_lock);
} }
return(TRUE); return(TRUE);
@ -114,8 +114,8 @@ row_upd_index_is_referenced(
foreign = UT_LIST_GET_NEXT(referenced_list, foreign); foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
} }
if (!trx->has_dict_foreign_key_check_lock) { if (!trx->has_dict_operation_lock) {
rw_lock_s_unlock(&dict_foreign_key_check_lock); rw_lock_s_unlock(&dict_operation_lock);
} }
return(FALSE); return(FALSE);
@ -162,12 +162,12 @@ row_upd_check_references_constraints(
mtr_start(mtr); mtr_start(mtr);
if (!trx->has_dict_foreign_key_check_lock) { if (!trx->has_dict_operation_lock) {
got_s_lock = TRUE; got_s_lock = TRUE;
rw_lock_s_lock(&dict_foreign_key_check_lock); rw_lock_s_lock(&dict_operation_lock);
trx->has_dict_foreign_key_check_lock = TRUE; trx->has_dict_operation_lock = TRUE;
} }
foreign = UT_LIST_GET_FIRST(table->referenced_list); foreign = UT_LIST_GET_FIRST(table->referenced_list);
@ -189,7 +189,7 @@ row_upd_check_references_constraints(
} }
/* NOTE that if the thread ends up waiting for a lock /* NOTE that if the thread ends up waiting for a lock
we will release dict_foreign_key_check_lock we will release dict_operation_lock
temporarily! But the counter on the table temporarily! But the counter on the table
protects 'foreign' from being dropped while the check protects 'foreign' from being dropped while the check
is running. */ is running. */
@ -212,8 +212,8 @@ row_upd_check_references_constraints(
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
if (got_s_lock) { if (got_s_lock) {
rw_lock_s_unlock( rw_lock_s_unlock(
&dict_foreign_key_check_lock); &dict_operation_lock);
trx->has_dict_foreign_key_check_lock trx->has_dict_operation_lock
= FALSE; = FALSE;
} }
@ -227,8 +227,8 @@ row_upd_check_references_constraints(
} }
if (got_s_lock) { if (got_s_lock) {
rw_lock_s_unlock(&dict_foreign_key_check_lock); rw_lock_s_unlock(&dict_operation_lock);
trx->has_dict_foreign_key_check_lock = FALSE; trx->has_dict_operation_lock = FALSE;
} }
mem_heap_free(heap); mem_heap_free(heap);

View File

@ -136,8 +136,6 @@ byte srv_latin1_ordering[256] /* The sort order table of the latin1
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF , 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
}; };
ibool srv_use_native_aio = FALSE;
ulint srv_pool_size = ULINT_MAX; /* size in database pages; ulint srv_pool_size = ULINT_MAX; /* size in database pages;
MySQL originally sets this MySQL originally sets this
value in megabytes */ value in megabytes */
@ -151,8 +149,9 @@ dulint srv_archive_recovery_limit_lsn;
ulint srv_lock_wait_timeout = 1024 * 1024 * 1024; ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
char* srv_unix_file_flush_method_str = NULL; char* srv_file_flush_method_str = NULL;
ulint srv_unix_file_flush_method = 0; ulint srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
/* If the following is != 0 we do not allow inserts etc. This protects /* If the following is != 0 we do not allow inserts etc. This protects
the user from forgetting the innodb_force_recovery keyword to my.cnf */ the user from forgetting the innodb_force_recovery keyword to my.cnf */
@ -281,6 +280,9 @@ time_t srv_last_monitor_time;
mutex_t srv_innodb_monitor_mutex; mutex_t srv_innodb_monitor_mutex;
ulint srv_main_thread_process_no = 0;
ulint srv_main_thread_id = 0;
/* /*
IMPLEMENTATION OF THE SERVER MAIN PROGRAM IMPLEMENTATION OF THE SERVER MAIN PROGRAM
========================================= =========================================
@ -2046,13 +2048,15 @@ srv_table_reserve_slot_for_mysql(void)
} }
/******************************************************************* /*******************************************************************
Puts a MySQL OS thread to wait for a lock to be released. */ Puts a MySQL OS thread to wait for a lock to be released. If an error
occurs during the wait trx->error_state associated with thr is
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
are possible errors. DB_DEADLOCK is returned if selective deadlock
resolution chose this transaction as a victim. */
ibool void
srv_suspend_mysql_thread( srv_suspend_mysql_thread(
/*=====================*/ /*=====================*/
/* out: TRUE if the lock wait timeout was
exceeded */
que_thr_t* thr) /* in: query thread associated with the MySQL que_thr_t* thr) /* in: query thread associated with the MySQL
OS thread */ OS thread */
{ {
@ -2069,13 +2073,15 @@ srv_suspend_mysql_thread(
mutex_enter(&kernel_mutex); mutex_enter(&kernel_mutex);
trx->error_state = DB_SUCCESS;
if (thr->state == QUE_THR_RUNNING) { if (thr->state == QUE_THR_RUNNING) {
/* The lock has already been released: no need to suspend */ /* The lock has already been released: no need to suspend */
mutex_exit(&kernel_mutex); mutex_exit(&kernel_mutex);
return(FALSE); return;
} }
slot = srv_table_reserve_slot_for_mysql(); slot = srv_table_reserve_slot_for_mysql();
@ -2101,18 +2107,18 @@ srv_suspend_mysql_thread(
srv_conc_force_exit_innodb(thr_get_trx(thr)); srv_conc_force_exit_innodb(thr_get_trx(thr));
/* Release possible foreign key check latch */ /* Release possible foreign key check latch */
if (trx->has_dict_foreign_key_check_lock) { if (trx->has_dict_operation_lock) {
rw_lock_s_unlock(&dict_foreign_key_check_lock); rw_lock_s_unlock(&dict_operation_lock);
} }
/* Wait for the release */ /* Wait for the release */
os_event_wait(event); os_event_wait(event);
if (trx->has_dict_foreign_key_check_lock) { if (trx->has_dict_operation_lock) {
rw_lock_s_lock(&dict_foreign_key_check_lock); rw_lock_s_lock(&dict_operation_lock);
} }
/* Return back inside InnoDB */ /* Return back inside InnoDB */
@ -2131,10 +2137,9 @@ srv_suspend_mysql_thread(
if (srv_lock_wait_timeout < 100000000 && if (srv_lock_wait_timeout < 100000000 &&
wait_time > (double)srv_lock_wait_timeout) { wait_time > (double)srv_lock_wait_timeout) {
return(TRUE);
}
return(FALSE); trx->error_state = DB_LOCK_WAIT_TIMEOUT;
}
} }
/************************************************************************ /************************************************************************
@ -2300,9 +2305,19 @@ srv_sprintf_innodb_monitor(
"ROW OPERATIONS\n" "ROW OPERATIONS\n"
"--------------\n"); "--------------\n");
buf += sprintf(buf, buf += sprintf(buf,
"%ld queries inside InnoDB, %ld queries in queue; main thread: %s\n", "%ld queries inside InnoDB, %ld queries in queue\n",
srv_conc_n_threads, srv_conc_n_waiting_threads, srv_conc_n_threads, srv_conc_n_waiting_threads);
#ifdef UNIV_LINUX
buf += sprintf(buf,
"Main thread process no %lu, state: %s\n",
srv_main_thread_process_no,
srv_main_thread_op_info); srv_main_thread_op_info);
#else
buf += sprintf(buf,
"Main thread id %lu, state: %s\n",
srv_main_thread_id,
srv_main_thread_op_info);
#endif
buf += sprintf(buf, buf += sprintf(buf,
"Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n", "Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n",
srv_n_rows_inserted, srv_n_rows_inserted,
@ -2636,6 +2651,9 @@ srv_master_thread(
UT_NOT_USED(arg); UT_NOT_USED(arg);
srv_main_thread_process_no = os_proc_get_number();
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
srv_table_reserve_slot(SRV_MASTER); srv_table_reserve_slot(SRV_MASTER);
mutex_enter(&kernel_mutex); mutex_enter(&kernel_mutex);

View File

@ -515,7 +515,7 @@ srv_calc_high32(
} }
/************************************************************************* /*************************************************************************
Creates or opens the log files. */ Creates or opens the log files and closes them. */
static static
ulint ulint
open_or_create_log_file( open_or_create_log_file(
@ -640,7 +640,7 @@ open_or_create_log_file(
} }
/************************************************************************* /*************************************************************************
Creates or opens database data files. */ Creates or opens database data files and closes them. */
static static
ulint ulint
open_or_create_data_files( open_or_create_data_files(
@ -965,31 +965,63 @@ innobase_start_or_create_for_mysql(void)
srv_is_being_started = TRUE; srv_is_being_started = TRUE;
srv_startup_is_before_trx_rollback_phase = TRUE; srv_startup_is_before_trx_rollback_phase = TRUE;
os_aio_use_native_aio = FALSE;
#ifdef __WIN__
if (os_get_os_version() == OS_WIN95
|| os_get_os_version() == OS_WIN31
|| os_get_os_version() == OS_WINNT) {
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE;
} else {
/* On Win 2000 and XP use async i/o */
os_aio_use_native_aio = TRUE;
}
#endif
if (srv_file_flush_method_str == NULL) {
/* These are the default options */
if (0 == ut_strcmp(srv_unix_file_flush_method_str, "fdatasync")) {
srv_unix_file_flush_method = SRV_UNIX_FDATASYNC; srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "O_DSYNC")) { srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
#ifndef __WIN__
} else if (0 == ut_strcmp(srv_file_flush_method_str, "fdatasync")) {
srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
srv_unix_file_flush_method = SRV_UNIX_O_DSYNC; srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, } else if (0 == ut_strcmp(srv_file_flush_method_str,
"littlesync")) { "littlesync")) {
srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "nosync")) { } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
srv_unix_file_flush_method = SRV_UNIX_NOSYNC; srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
#else
} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
os_aio_use_native_aio = FALSE;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = FALSE;
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
#endif
} else { } else {
fprintf(stderr, fprintf(stderr,
"InnoDB: Unrecognized value %s for innodb_flush_method\n", "InnoDB: Unrecognized value %s for innodb_flush_method\n",
srv_unix_file_flush_method_str); srv_file_flush_method_str);
return(DB_ERROR); return(DB_ERROR);
} }
/*
printf("srv_unix set to %lu\n", srv_unix_file_flush_method);
*/
os_aio_use_native_aio = srv_use_native_aio;
err = srv_boot(); err = srv_boot();
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
@ -999,34 +1031,15 @@ innobase_start_or_create_for_mysql(void)
/* Restrict the maximum number of file i/o threads */ /* Restrict the maximum number of file i/o threads */
if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) { if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
srv_n_file_io_threads = SRV_MAX_N_IO_THREADS; srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
} }
#if !(defined(WIN_ASYNC_IO) || defined(POSIX_ASYNC_IO)) if (!os_aio_use_native_aio) {
/* In simulated aio we currently have use only for 4 threads */ /* In simulated aio we currently have use only for 4 threads */
os_aio_use_native_aio = FALSE;
srv_n_file_io_threads = 4; srv_n_file_io_threads = 4;
#endif
#ifdef __WIN__
if (os_get_os_version() == OS_WIN95
|| os_get_os_version() == OS_WIN31) {
/* On Win 95, 98, ME, and Win32 subsystem for Windows 3.1 use
simulated aio */
os_aio_use_native_aio = FALSE;
srv_n_file_io_threads = 4;
} else {
/* On NT and Win 2000 always use aio */
os_aio_use_native_aio = TRUE;
}
#endif
os_aio_use_native_aio = FALSE;
if (!os_aio_use_native_aio) {
os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
* srv_n_file_io_threads, * srv_n_file_io_threads,
srv_n_file_io_threads, srv_n_file_io_threads,
@ -1047,15 +1060,6 @@ innobase_start_or_create_for_mysql(void)
lock_sys_create(srv_lock_table_size); lock_sys_create(srv_lock_table_size);
#ifdef POSIX_ASYNC_IO
if (os_aio_use_native_aio) {
/* There is only one thread per async io array:
one for ibuf i/o, one for log i/o, one for ordinary reads,
one for ordinary writes; we need only 4 i/o threads */
srv_n_file_io_threads = 4;
}
#endif
/* Create i/o-handler threads: */ /* Create i/o-handler threads: */
for (i = 0; i < srv_n_file_io_threads; i++) { for (i = 0; i < srv_n_file_io_threads; i++) {

View File

@ -663,7 +663,8 @@ rw_lock_own(
/*========*/ /*========*/
/* out: TRUE if locked */ /* out: TRUE if locked */
rw_lock_t* lock, /* in: rw-lock */ rw_lock_t* lock, /* in: rw-lock */
ulint lock_type) /* in: lock type */ ulint lock_type) /* in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
{ {
rw_lock_debug_t* info; rw_lock_debug_t* info;

View File

@ -901,8 +901,7 @@ sync_thread_levels_empty_gen(
if (slot->latch != NULL && (!dict_mutex_allowed || if (slot->latch != NULL && (!dict_mutex_allowed ||
(slot->level != SYNC_DICT (slot->level != SYNC_DICT
&& slot->level != SYNC_FOREIGN_KEY_CHECK && slot->level != SYNC_DICT_OPERATION))) {
&& slot->level != SYNC_PURGE_IS_RUNNING))) {
lock = slot->latch; lock = slot->latch;
mutex = slot->latch; mutex = slot->latch;
@ -1087,12 +1086,10 @@ sync_thread_add_level(
SYNC_IBUF_PESS_INSERT_MUTEX)); SYNC_IBUF_PESS_INSERT_MUTEX));
} else if (level == SYNC_DICT_AUTOINC_MUTEX) { } else if (level == SYNC_DICT_AUTOINC_MUTEX) {
ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX)); ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX));
} else if (level == SYNC_FOREIGN_KEY_CHECK) { } else if (level == SYNC_DICT_OPERATION) {
ut_a(sync_thread_levels_g(array, SYNC_FOREIGN_KEY_CHECK)); ut_a(sync_thread_levels_g(array, SYNC_DICT_OPERATION));
} else if (level == SYNC_DICT_HEADER) { } else if (level == SYNC_DICT_HEADER) {
ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER)); ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER));
} else if (level == SYNC_PURGE_IS_RUNNING) {
ut_a(sync_thread_levels_g(array, SYNC_PURGE_IS_RUNNING));
} else if (level == SYNC_DICT) { } else if (level == SYNC_DICT) {
ut_a(buf_debug_prints ut_a(buf_debug_prints
|| sync_thread_levels_g(array, SYNC_DICT)); || sync_thread_levels_g(array, SYNC_DICT));

View File

@ -209,9 +209,6 @@ trx_purge_sys_create(void)
purge_sys->purge_undo_no = ut_dulint_zero; purge_sys->purge_undo_no = ut_dulint_zero;
purge_sys->next_stored = FALSE; purge_sys->next_stored = FALSE;
rw_lock_create(&(purge_sys->purge_is_running));
rw_lock_set_level(&(purge_sys->purge_is_running),
SYNC_PURGE_IS_RUNNING);
rw_lock_create(&(purge_sys->latch)); rw_lock_create(&(purge_sys->latch));
rw_lock_set_level(&(purge_sys->latch), SYNC_PURGE_LATCH); rw_lock_set_level(&(purge_sys->latch), SYNC_PURGE_LATCH);

View File

@ -23,7 +23,7 @@ Created 3/26/1996 Heikki Tuuri
#include "srv0srv.h" #include "srv0srv.h"
#include "thr0loc.h" #include "thr0loc.h"
#include "btr0sea.h" #include "btr0sea.h"
#include "os0proc.h"
/* Copy of the prototype for innobase_mysql_print_thd: this /* Copy of the prototype for innobase_mysql_print_thd: this
copy MUST be equal to the one in mysql/sql/ha_innobase.cc ! */ copy MUST be equal to the one in mysql/sql/ha_innobase.cc ! */
@ -85,12 +85,14 @@ trx_create(
trx->conc_state = TRX_NOT_STARTED; trx->conc_state = TRX_NOT_STARTED;
trx->start_time = time(NULL); trx->start_time = time(NULL);
trx->isolation_level = TRX_ISO_REPEATABLE_READ;
trx->check_foreigns = TRUE; trx->check_foreigns = TRUE;
trx->check_unique_secondary = TRUE; trx->check_unique_secondary = TRUE;
trx->dict_operation = FALSE; trx->dict_operation = FALSE;
trx->mysql_thd = NULL; trx->mysql_thd = NULL;
trx->mysql_query_str = NULL;
trx->n_mysql_tables_in_use = 0; trx->n_mysql_tables_in_use = 0;
trx->mysql_n_tables_locked = 0; trx->mysql_n_tables_locked = 0;
@ -132,7 +134,7 @@ trx_create(
trx->lock_heap = mem_heap_create_in_buffer(256); trx->lock_heap = mem_heap_create_in_buffer(256);
UT_LIST_INIT(trx->trx_locks); UT_LIST_INIT(trx->trx_locks);
trx->has_dict_foreign_key_check_lock = FALSE; trx->has_dict_operation_lock = FALSE;
trx->has_search_latch = FALSE; trx->has_search_latch = FALSE;
trx->search_latch_timeout = BTR_SEA_TIMEOUT; trx->search_latch_timeout = BTR_SEA_TIMEOUT;
@ -176,6 +178,8 @@ trx_allocate_for_mysql(void)
trx->mysql_thread_id = os_thread_get_curr_id(); trx->mysql_thread_id = os_thread_get_curr_id();
trx->mysql_process_no = os_proc_get_number();
return(trx); return(trx);
} }
@ -1497,9 +1501,12 @@ trx_print(
default: buf += sprintf(buf, " state %lu", trx->conc_state); default: buf += sprintf(buf, " state %lu", trx->conc_state);
} }
#ifdef UNIV_LINUX
buf += sprintf(buf, ", process no %lu", trx->mysql_process_no);
#else
buf += sprintf(buf, ", OS thread id %lu", buf += sprintf(buf, ", OS thread id %lu",
os_thread_pf(trx->mysql_thread_id)); os_thread_pf(trx->mysql_thread_id));
#endif
if (ut_strlen(trx->op_info) > 0) { if (ut_strlen(trx->op_info) > 0) {
buf += sprintf(buf, " %s", trx->op_info); buf += sprintf(buf, " %s", trx->op_info);
} }

View File

@ -97,6 +97,8 @@ are determined in innobase_init below: */
char* innobase_data_home_dir = NULL; char* innobase_data_home_dir = NULL;
char* innobase_log_group_home_dir = NULL; char* innobase_log_group_home_dir = NULL;
char* innobase_log_arch_dir = NULL; char* innobase_log_arch_dir = NULL;
/* The following has a midleading name: starting from 4.0.5 this also
affects Windows */
char* innobase_unix_file_flush_method = NULL; char* innobase_unix_file_flush_method = NULL;
/* Below we have boolean-valued start-up parameters, and their default /* Below we have boolean-valued start-up parameters, and their default
@ -346,6 +348,7 @@ check_trx_exists(
trx = trx_allocate_for_mysql(); trx = trx_allocate_for_mysql();
trx->mysql_thd = thd; trx->mysql_thd = thd;
trx->mysql_query_str = &((*thd).query);
thd->transaction.all.innobase_tid = trx; thd->transaction.all.innobase_tid = trx;
@ -713,9 +716,10 @@ innobase_init(void)
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
} }
srv_unix_file_flush_method_str = (innobase_unix_file_flush_method ?
srv_file_flush_method_str = (innobase_unix_file_flush_method ?
innobase_unix_file_flush_method : innobase_unix_file_flush_method :
(char*)"fdatasync"); NULL);
srv_n_log_groups = (ulint) innobase_mirrored_log_groups; srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
srv_n_log_files = (ulint) innobase_log_files_in_group; srv_n_log_files = (ulint) innobase_log_files_in_group;
@ -725,8 +729,6 @@ innobase_init(void)
srv_log_buffer_size = (ulint) innobase_log_buffer_size; srv_log_buffer_size = (ulint) innobase_log_buffer_size;
srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit; srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
srv_use_native_aio = 0;
srv_pool_size = (ulint) innobase_buffer_pool_size; srv_pool_size = (ulint) innobase_buffer_pool_size;
srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
@ -2179,8 +2181,16 @@ convert_search_mode_to_innobase(
case HA_READ_AFTER_KEY: return(PAGE_CUR_G); case HA_READ_AFTER_KEY: return(PAGE_CUR_G);
case HA_READ_BEFORE_KEY: return(PAGE_CUR_L); case HA_READ_BEFORE_KEY: return(PAGE_CUR_L);
case HA_READ_PREFIX: return(PAGE_CUR_GE); case HA_READ_PREFIX: return(PAGE_CUR_GE);
case HA_READ_PREFIX_LAST: return(PAGE_CUR_LE); case HA_READ_PREFIX_LAST:
/* HA_READ_PREFIX_LAST does not yet work in InnoDB! */ /* ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: Using HA_READ_PREFIX_LAST\n"); */
return(PAGE_CUR_LE);
/* InnoDB does not yet support ..PREFIX_LAST!
We have to add a new search flag
PAGE_CUR_LE_OR_PREFIX to InnoDB. */
/* the above PREFIX flags mean that the last /* the above PREFIX flags mean that the last
field in the key value may just be a prefix field in the key value may just be a prefix
of the complete fixed length field */ of the complete fixed length field */
@ -3639,7 +3649,6 @@ ha_innobase::reset(void)
return(0); return(0);
} }
/********************************************************************** /**********************************************************************
When we create a temporary table inside MySQL LOCK TABLES, MySQL will When we create a temporary table inside MySQL LOCK TABLES, MySQL will
not call external_lock for the temporary table when it uses it. Instead, not call external_lock for the temporary table when it uses it. Instead,
@ -3661,6 +3670,14 @@ ha_innobase::start_stmt(
innobase_release_stat_resources(trx); innobase_release_stat_resources(trx);
trx_mark_sql_stat_end(trx); trx_mark_sql_stat_end(trx);
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
&& trx->read_view) {
/* At low transaction isolation levels we let
each consistent read set its own snapshot */
read_view_close_for_mysql(trx);
}
auto_inc_counter_for_this_stat = 0; auto_inc_counter_for_this_stat = 0;
prebuilt->sql_stat_start = TRUE; prebuilt->sql_stat_start = TRUE;
prebuilt->hint_no_need_to_fetch_extra_cols = TRUE; prebuilt->hint_no_need_to_fetch_extra_cols = TRUE;
@ -3680,6 +3697,24 @@ ha_innobase::start_stmt(
return(0); return(0);
} }
/**********************************************************************
Maps a MySQL trx isolation level code to the InnoDB isolation level code */
inline
ulint
innobase_map_isolation_level(
/*=========================*/
/* out: InnoDB isolation level */
enum_tx_isolation iso) /* in: MySQL isolation level code */
{
switch(iso) {
case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
default: ut_a(0); return(0);
}
}
/********************************************************************** /**********************************************************************
As MySQL will execute an external lock for every new table it uses when it As MySQL will execute an external lock for every new table it uses when it
starts to process an SQL statement (an exception is when MySQL calls starts to process an SQL statement (an exception is when MySQL calls
@ -3726,7 +3761,13 @@ ha_innobase::external_lock(
thd->transaction.all.innodb_active_trans = 1; thd->transaction.all.innodb_active_trans = 1;
trx->n_mysql_tables_in_use++; trx->n_mysql_tables_in_use++;
if (thd->variables.tx_isolation == ISO_SERIALIZABLE if (thd->variables.tx_isolation != ISO_REPEATABLE_READ) {
trx->isolation_level = innobase_map_isolation_level(
(enum_tx_isolation)
thd->variables.tx_isolation);
}
if (trx->isolation_level == TRX_ISO_SERIALIZABLE
&& prebuilt->select_lock_type == LOCK_NONE) { && prebuilt->select_lock_type == LOCK_NONE) {
/* To get serializable execution we let InnoDB /* To get serializable execution we let InnoDB
@ -3753,6 +3794,15 @@ ha_innobase::external_lock(
innobase_release_stat_resources(trx); innobase_release_stat_resources(trx);
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
&& trx->read_view) {
/* At low transaction isolation levels we let
each consistent read set its own snapshot */
read_view_close_for_mysql(trx);
}
if (!(thd->options if (!(thd->options
& (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
@ -3779,7 +3829,6 @@ innodb_show_status(
DBUG_ENTER("innodb_show_status"); DBUG_ENTER("innodb_show_status");
if (innodb_skip) { if (innodb_skip) {
fprintf(stderr, fprintf(stderr,
"Cannot call SHOW INNODB STATUS because skip-innodb is defined\n"); "Cannot call SHOW INNODB STATUS because skip-innodb is defined\n");

View File

@ -96,7 +96,7 @@ class ha_innobase: public handler
ulong index_flags(uint idx) const ulong index_flags(uint idx) const
{ {
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | return (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER |
HA_KEY_READ_ONLY | HA_NOT_READ_PREFIX_LAST); HA_KEY_READ_ONLY);
} }
uint max_record_length() const { return HA_MAX_REC_LENGTH; } uint max_record_length() const { return HA_MAX_REC_LENGTH; }
uint max_keys() const { return MAX_KEY; } uint max_keys() const { return MAX_KEY; }

View File

@ -3879,7 +3879,7 @@ static void set_options(void)
/* Set default values for some variables */ /* Set default values for some variables */
global_system_variables.table_type=DB_TYPE_MYISAM; global_system_variables.table_type=DB_TYPE_MYISAM;
global_system_variables.tx_isolation=ISO_READ_COMMITTED; global_system_variables.tx_isolation=ISO_REPEATABLE_READ;
global_system_variables.select_limit= (ulong) HA_POS_ERROR; global_system_variables.select_limit= (ulong) HA_POS_ERROR;
max_system_variables.select_limit= (ulong) HA_POS_ERROR; max_system_variables.select_limit= (ulong) HA_POS_ERROR;
global_system_variables.max_join_size= (ulong) HA_POS_ERROR; global_system_variables.max_join_size= (ulong) HA_POS_ERROR;
@ -4351,7 +4351,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
} }
global_system_variables.tx_isolation= ((opt_sql_mode & MODE_SERIALIZABLE) ? global_system_variables.tx_isolation= ((opt_sql_mode & MODE_SERIALIZABLE) ?
ISO_SERIALIZABLE : ISO_SERIALIZABLE :
ISO_READ_COMMITTED); ISO_REPEATABLE_READ);
break; break;
} }
case OPT_MASTER_PASSWORD: case OPT_MASTER_PASSWORD: