From 4819f65a8b1751f126c8ef79d8cfced5ada8f50f Mon Sep 17 00:00:00 2001 From: Aleksandr Kuzminsky Date: Wed, 4 Nov 2009 12:11:12 -0800 Subject: [PATCH] Sync with rev. 114 --- buf/buf0buddy.c | 7 +- buf/buf0buf.c | 106 ++++++++++- buf/buf0flu.c | 25 +-- buf/buf0lru.c | 15 +- buf/buf0rea.c | 34 ++-- dict/dict0dict.c | 2 +- fil/fil0fil.c | 9 +- handler/ha_innodb.cc | 215 ++++++++++++++++++---- handler/handler0alter.cc | 17 ++ handler/i_s.cc | 137 ++++++++++++++ handler/i_s.h | 1 + handler/innodb_patch_info.h | 3 + include/buf0buf.ic | 8 +- include/buf0rea.h | 7 +- include/fil0fil.h | 8 +- include/os0file.h | 12 +- include/srv0srv.h | 13 +- include/sync0rw.h | 9 +- include/sync0sync.h | 8 +- include/trx0purge.h | 24 +++ include/trx0trx.h | 11 ++ lock/lock0lock.c | 12 ++ log/log0log.c | 10 + mysql-test/innodb-index.result | 1 + mysql-test/innodb-index.test | 2 + mysql-test/innodb_file_format.result | 4 +- mysql-test/innodb_file_format.test | 16 +- mysql-test/innodb_xtradb_bug317074.result | 1 - mysql-test/innodb_xtradb_bug317074.test | 4 +- mysql-test/patches/disabled.def.diff | 11 +- mysql-test/patches/innodb_bug46000.diff | 26 +++ os/os0file.c | 60 +++++- row/row0mysql.c | 10 - srv/srv0srv.c | 107 ++++++++++- srv/srv0start.c | 13 +- sync/sync0arr.c | 9 +- sync/sync0rw.c | 35 ++-- sync/sync0sync.c | 19 +- trx/trx0purge.c | 86 ++++++++- trx/trx0trx.c | 42 +++++ 40 files changed, 965 insertions(+), 174 deletions(-) create mode 100644 mysql-test/patches/innodb_bug46000.diff diff --git a/buf/buf0buddy.c b/buf/buf0buddy.c index 6ee7a71a2e5..586a8dee1a2 100644 --- a/buf/buf0buddy.c +++ b/buf/buf0buddy.c @@ -531,11 +531,10 @@ buf_buddy_relocate( UNIV_MEM_ASSERT_W(src, size); mutex = buf_page_get_mutex_enter(bpage); - ut_a(mutex); mutex_enter(&zip_free_mutex); - if (buf_page_can_relocate(bpage)) { + if (mutex && buf_page_can_relocate(bpage)) { /* Relocate the compressed page. */ ut_a(bpage->zip.data == src); memcpy(dst, src, size); @@ -563,7 +562,9 @@ success: rw_lock_x_unlock(&page_hash_latch); } - mutex_exit(mutex); + if (mutex) { + mutex_exit(mutex); + } } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) { /* This must be a buf_page_t object. */ UNIV_MEM_ASSERT_RW(src, size); diff --git a/buf/buf0buf.c b/buf/buf0buf.c index 8da0a87751d..dd17f6faf21 100644 --- a/buf/buf0buf.c +++ b/buf/buf0buf.c @@ -52,6 +52,39 @@ Created 11/5/1995 Heikki Tuuri #include "log0recv.h" #include "page0zip.h" +/* prototypes for new functions added to ha_innodb.cc */ +trx_t* innobase_get_trx(); + +inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) +{ + ulint block_hash; + ulint block_hash_byte; + byte block_hash_offset; + + ut_ad(block); + + if (!innobase_get_slow_log() || !trx || !trx->take_stats) + return; + + if (!trx->distinct_page_access_hash) { + trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); + } + + block_hash = ut_hash_ulint((block->page.space << 20) + block->page.space + + block->page.offset, DPAH_SIZE << 3); + block_hash_byte = block_hash >> 3; + block_hash_offset = (byte) block_hash & 0x07; + if (block_hash_byte < 0 || block_hash_byte >= DPAH_SIZE) + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset); + if (block_hash_offset < 0 || block_hash_offset > 7) + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset); + if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) + trx->distinct_page_access++; + trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset; + return; +} + /* IMPLEMENTATION OF THE BUFFER POOL ================================= @@ -1696,10 +1729,18 @@ buf_page_get_zip( buf_page_t* bpage; mutex_t* block_mutex; ibool must_read; + trx_t* trx = NULL; + ulint sec; + ulint ms; + ib_uint64_t start_time; + ib_uint64_t finish_time; #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside()); #endif + if (innobase_get_slow_log()) { + trx = innobase_get_trx(); + } buf_pool->n_page_gets++; for (;;) { @@ -1716,7 +1757,7 @@ lookup: //buf_pool_mutex_exit(); rw_lock_s_unlock(&page_hash_latch); - buf_read_page(space, zip_size, offset); + buf_read_page(space, zip_size, offset, trx); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 37 || buf_validate()); @@ -1793,6 +1834,13 @@ got_block: /* Let us wait until the read operation completes */ + if (innobase_get_slow_log() && trx && trx->take_stats) + { + ut_usectime(&sec, &ms); + start_time = (ib_uint64_t)sec * 1000000 + ms; + } else { + start_time = 0; + } for (;;) { enum buf_io_fix io_fix; @@ -1807,6 +1855,12 @@ got_block: break; } } + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) + { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } } #ifdef UNIV_IBUF_COUNT_DEBUG @@ -2062,6 +2116,11 @@ buf_page_get_gen( ulint fix_type; ibool must_read; mutex_t* block_mutex; + trx_t* trx = NULL; + ulint sec; + ulint ms; + ib_uint64_t start_time; + ib_uint64_t finish_time; ut_ad(mtr); ut_ad((rw_latch == RW_S_LATCH) @@ -2075,6 +2134,9 @@ buf_page_get_gen( #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL)); #endif + if (innobase_get_slow_log()) { + trx = innobase_get_trx(); + } buf_pool->n_page_gets++; loop: block = guess; @@ -2082,7 +2144,6 @@ loop: if (block) { block_mutex = buf_page_get_mutex_enter((buf_page_t*)block); - ut_a(block_mutex); /* If the guess is a compressed page descriptor that has been allocated by buf_buddy_alloc(), it may have @@ -2092,7 +2153,9 @@ loop: the guess may be pointing to a buffer pool chunk that has been released when resizing the buffer pool. */ - if (!buf_block_is_uncompressed(block) + if (!block_mutex) { + block = guess = NULL; + } else if (!buf_block_is_uncompressed(block) || offset != block->page.offset || space != block->page.space || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) { @@ -2127,7 +2190,7 @@ loop2: return(NULL); } - buf_read_page(space, zip_size, offset); + buf_read_page(space, zip_size, offset, trx); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 37 || buf_validate()); @@ -2379,6 +2442,13 @@ wait_until_unfixed: /* Let us wait until the read operation completes */ + if (innobase_get_slow_log() && trx && trx->take_stats) + { + ut_usectime(&sec, &ms); + start_time = (ib_uint64_t)sec * 1000000 + ms; + } else { + start_time = 0; + } for (;;) { enum buf_io_fix io_fix; @@ -2393,6 +2463,12 @@ wait_until_unfixed: break; } } + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) + { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } } fix_type = MTR_MEMO_BUF_FIX; @@ -2418,13 +2494,17 @@ wait_until_unfixed: /* In the case of a first access, try to apply linear read-ahead */ - buf_read_ahead_linear(space, zip_size, offset); + buf_read_ahead_linear(space, zip_size, offset, trx); } #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(buf_block_get_space(block), buf_block_get_page_no(block)) == 0); #endif + if (innobase_get_slow_log()) { + _increment_page_get_statistics(block, trx); + } + return(block); } @@ -2447,6 +2527,7 @@ buf_page_optimistic_get_func( ibool accessed; ibool success; ulint fix_type; + trx_t* trx = NULL; ut_ad(mtr && block); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); @@ -2520,13 +2601,17 @@ buf_page_optimistic_get_func( #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(block->page.file_page_was_freed == FALSE); #endif + if (innobase_get_slow_log()) { + trx = innobase_get_trx(); + } + if (UNIV_UNLIKELY(!accessed)) { /* In the case of a first access, try to apply linear read-ahead */ buf_read_ahead_linear(buf_block_get_space(block), buf_block_get_zip_size(block), - buf_block_get_page_no(block)); + buf_block_get_page_no(block), trx); } #ifdef UNIV_IBUF_COUNT_DEBUG @@ -2535,6 +2620,9 @@ buf_page_optimistic_get_func( #endif buf_pool->n_page_gets++; + if (innobase_get_slow_log()) { + _increment_page_get_statistics(block, trx); + } return(TRUE); } @@ -2556,6 +2644,7 @@ buf_page_get_known_nowait( { ibool success; ulint fix_type; + trx_t* trx = NULL; ut_ad(mtr); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); @@ -2623,6 +2712,11 @@ buf_page_get_known_nowait( #endif buf_pool->n_page_gets++; + if (innobase_get_slow_log()) { + trx = innobase_get_trx(); + _increment_page_get_statistics(block, trx); + } + return(TRUE); } diff --git a/buf/buf0flu.c b/buf/buf0flu.c index d465483691a..6c25f0f55ae 100644 --- a/buf/buf0flu.c +++ b/buf/buf0flu.c @@ -994,9 +994,7 @@ buf_flush_try_neighbors( || buf_page_is_old(bpage)) { mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); - ut_a(block_mutex); - - if (buf_flush_ready_for_flush(bpage, flush_type) + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type) && (i == offset || !bpage->buf_fix_count)) { /* We only try to flush those neighbors != offset where the buf fix count is @@ -1012,7 +1010,7 @@ buf_flush_try_neighbors( //buf_pool_mutex_enter(); rw_lock_s_lock(&page_hash_latch); - } else { + } else if (block_mutex) { mutex_exit(block_mutex); } } @@ -1123,11 +1121,14 @@ flush_next: mutex_t*block_mutex = buf_page_get_mutex_enter(bpage); ibool ready; - ut_a(buf_page_in_file(bpage)); + //ut_a(buf_page_in_file(bpage)); - ut_a(block_mutex); - ready = buf_flush_ready_for_flush(bpage, flush_type); - mutex_exit(block_mutex); + if (block_mutex) { + ready = buf_flush_ready_for_flush(bpage, flush_type); + mutex_exit(block_mutex); + } else { + ready = FALSE; + } if (ready) { space = buf_page_get_space(bpage); @@ -1271,13 +1272,13 @@ buf_flush_LRU_recommendation(void) } block_mutex = buf_page_get_mutex_enter(bpage); - ut_a(block_mutex); - - if (buf_flush_ready_for_replace(bpage)) { + if (block_mutex && buf_flush_ready_for_replace(bpage)) { n_replaceable++; } - mutex_exit(block_mutex); + if (block_mutex) { + mutex_exit(block_mutex); + } distance++; diff --git a/buf/buf0lru.c b/buf/buf0lru.c index 2270ea5dce2..caddb51c983 100644 --- a/buf/buf0lru.c +++ b/buf/buf0lru.c @@ -255,9 +255,12 @@ scan_again: mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); buf_page_t* prev_bpage; - ut_a(block_mutex); prev_bpage = UT_LIST_GET_PREV(LRU, bpage); + if (!block_mutex) { + goto next_page; + } + ut_a(buf_page_in_file(bpage)); if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE @@ -360,9 +363,13 @@ scan_again: ut_a(buf_page_in_file(bpage)); - ut_a(block_mutex); prev_bpage = UT_LIST_GET_PREV(LRU, bpage); + if (!block_mutex) { + bpage = prev_bpage; + continue; + } + if (buf_page_get_space(bpage) == id) { if (bpage->buf_fix_count > 0 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) { @@ -634,7 +641,9 @@ restart: mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); - ut_a(block_mutex); + if (!block_mutex) { + goto restart; + } if (!bpage->in_LRU_list || !buf_page_in_file(bpage)) { diff --git a/buf/buf0rea.c b/buf/buf0rea.c index f2dbe939c92..88ee5eb7431 100644 --- a/buf/buf0rea.c +++ b/buf/buf0rea.c @@ -82,7 +82,8 @@ buf_read_page_low( treat the tablespace as dropped; this is a timestamp we use to stop dangling page reads from a tablespace which we have DISCARDed + IMPORTed back */ - ulint offset) /*!< in: page number */ + ulint offset, /*!< in: page number */ + trx_t* trx) { buf_page_t* bpage; ulint wake_later; @@ -183,15 +184,15 @@ not_to_recover: ut_ad(buf_page_in_file(bpage)); if (zip_size) { - *err = fil_io(OS_FILE_READ | wake_later, + *err = _fil_io(OS_FILE_READ | wake_later, sync, space, zip_size, offset, 0, zip_size, - bpage->zip.data, bpage); + bpage->zip.data, bpage, trx); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); - *err = fil_io(OS_FILE_READ | wake_later, + *err = _fil_io(OS_FILE_READ | wake_later, sync, space, 0, offset, 0, UNIV_PAGE_SIZE, - ((buf_block_t*) bpage)->frame, bpage); + ((buf_block_t*) bpage)->frame, bpage, trx); } ut_a(*err == DB_SUCCESS); @@ -223,8 +224,9 @@ buf_read_ahead_random( /*==================*/ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint offset) /*!< in: page number of a page which the current thread + ulint offset, /*!< in: page number of a page which the current thread wants to access */ + trx_t* trx) { ib_int64_t tablespace_version; ulint recent_blocks = 0; @@ -340,7 +342,7 @@ read_ahead: &err, FALSE, ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, space, zip_size, FALSE, - tablespace_version, i); + tablespace_version, i, trx); if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fprintf(stderr, @@ -387,7 +389,8 @@ buf_read_page( /*==========*/ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint offset) /*!< in: page number */ + ulint offset, /*!< in: page number */ + trx_t* trx) { ib_int64_t tablespace_version; ulint count; @@ -396,14 +399,14 @@ buf_read_page( tablespace_version = fil_space_get_version(space); - count = buf_read_ahead_random(space, zip_size, offset); + count = buf_read_ahead_random(space, zip_size, offset, trx); /* We do the i/o in the synchronous aio mode to save thread switches: hence TRUE */ count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, zip_size, FALSE, - tablespace_version, offset); + tablespace_version, offset, trx); srv_buf_pool_reads+= count2; if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); @@ -454,8 +457,9 @@ buf_read_ahead_linear( /*==================*/ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint offset) /*!< in: page number of a page; NOTE: the current thread + ulint offset, /*!< in: page number of a page; NOTE: the current thread must want access to this page (see NOTE 3 above) */ + trx_t* trx) { ib_int64_t tablespace_version; buf_page_t* bpage; @@ -670,7 +674,7 @@ buf_read_ahead_linear( count += buf_read_page_low( &err, FALSE, ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, - space, zip_size, FALSE, tablespace_version, i); + space, zip_size, FALSE, tablespace_version, i, trx); if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fprintf(stderr, @@ -760,7 +764,7 @@ buf_read_ibuf_merge_pages( buf_read_page_low(&err, sync && (i + 1 == n_stored), BUF_READ_ANY_PAGE, space_ids[i], zip_size, TRUE, space_versions[i], - page_nos[i]); + page_nos[i], NULL); if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) { tablespace_deleted: @@ -857,12 +861,12 @@ buf_read_recv_pages( if ((i + 1 == n_stored) && sync) { buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, zip_size, TRUE, tablespace_version, - page_nos[i]); + page_nos[i], NULL); } else { buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE | OS_AIO_SIMULATED_WAKE_LATER, space, zip_size, TRUE, - tablespace_version, page_nos[i]); + tablespace_version, page_nos[i], NULL); } } diff --git a/dict/dict0dict.c b/dict/dict0dict.c index 1a28f354a67..050182bc831 100644 --- a/dict/dict0dict.c +++ b/dict/dict0dict.c @@ -1231,7 +1231,7 @@ dict_col_name_is_reserved( ulint i; for (i = 0; i < UT_ARR_SIZE(reserved_names); i++) { - if (strcmp(name, reserved_names[i]) == 0) { + if (innobase_strcasecmp(name, reserved_names[i]) == 0) { return(TRUE); } diff --git a/fil/fil0fil.c b/fil/fil0fil.c index 31fa257e51c..e06dc1906c6 100644 --- a/fil/fil0fil.c +++ b/fil/fil0fil.c @@ -4161,7 +4161,7 @@ fil_extend_space_to_desired_size( node->name, node->handle, buf, offset_low, offset_high, page_size * n_pages, - NULL, NULL); + NULL, NULL, NULL); #endif if (success) { node->size += n_pages; @@ -4488,7 +4488,7 @@ Reads or writes data. This operation is asynchronous (aio). i/o on a tablespace which does not exist */ UNIV_INTERN ulint -fil_io( +_fil_io( /*===*/ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, ORed to OS_FILE_LOG, if a log i/o @@ -4513,8 +4513,9 @@ fil_io( void* buf, /*!< in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message) /*!< in: message for aio handler if non-sync + void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ + trx_t* trx) { ulint mode; fil_space_t* space; @@ -4684,7 +4685,7 @@ fil_io( #else /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset_low, offset_high, len, node, message); + offset_low, offset_high, len, node, message, trx); #endif ut_a(ret); diff --git a/handler/ha_innodb.cc b/handler/ha_innodb.cc index 2d05f4936a6..c2f2113d571 100644 --- a/handler/ha_innodb.cc +++ b/handler/ha_innodb.cc @@ -206,7 +206,6 @@ static my_bool innobase_use_doublewrite = TRUE; static my_bool innobase_use_checksums = TRUE; static my_bool innobase_extra_undoslots = FALSE; static my_bool innobase_fast_recovery = FALSE; -static my_bool innobase_use_purge_thread = FALSE; static my_bool innobase_locks_unsafe_for_binlog = FALSE; static my_bool innobase_overwrite_relay_log_info = FALSE; static my_bool innobase_rollback_on_timeout = FALSE; @@ -300,8 +299,27 @@ innobase_alter_table_flags( /*=======================*/ uint flags); +/*********************************************************************** +This function checks each index name for a table against reserved +system default primary index name 'GEN_CLUST_INDEX'. If a name matches, +this function pushes an error message to the client, and returns true. */ +static +bool +innobase_index_name_is_reserved( +/*============================*/ + /* out: true if index name matches a + reserved name */ + const trx_t* trx, /* in: InnoDB transaction handle */ + const TABLE* form, /* in: information on table + columns and indexes */ + const char* norm_name); /* in: table name */ + static const char innobase_hton_name[]= "InnoDB"; +/* "GEN_CLUST_INDEX" is the name reserved for Innodb default +system primary index. */ +static const char innobase_index_reserve_name[]= "GEN_CLUST_INDEX"; + /*************************************************************//** Check for a valid value of innobase_commit_concurrency. @return 0 for valid innodb_commit_concurrency */ @@ -895,17 +913,8 @@ convert_error_code_to_mysql( return(ER_PRIMARY_CANT_HAVE_NULL); case DB_TOO_MANY_CONCURRENT_TRXS: - /* Once MySQL add the appropriate code to errmsg.txt then - we can get rid of this #ifdef. NOTE: The code checked by - the #ifdef is the suggested name for the error condition - and the actual error code name could very well be different. - This will require some monitoring, ie. the status - of this request on our part.*/ -#ifdef ER_TOO_MANY_CONCURRENT_TRXS - return(ER_TOO_MANY_CONCURRENT_TRXS); -#else - return(HA_ERR_RECORD_FILE_FULL); -#endif + return(HA_ERR_TOO_MANY_CONCURRENT_TRXS); + case DB_UNSUPPORTED: return(HA_ERR_UNSUPPORTED); } @@ -979,7 +988,22 @@ innobase_get_cset_width( *mbminlen = cs->mbminlen; *mbmaxlen = cs->mbmaxlen; } else { + if (current_thd + && (thd_sql_command(current_thd) == SQLCOM_DROP_TABLE)) { + + /* Fix bug#46256: allow tables to be dropped if the + collation is not found, but issue a warning. */ + if ((global_system_variables.log_warnings) + && (cset != 0)){ + + sql_print_warning( + "Unknown collation #%lu.", cset); + } + } else { + ut_a(cset == 0); + } + *mbminlen = *mbmaxlen = 0; } } @@ -1053,6 +1077,7 @@ innobase_get_charset( } #if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) +extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list; /*******************************************************************//** Map an OS error to an errno value. The OS error number is stored in _doserrno and the mapped value is stored in errno) */ @@ -1340,6 +1365,16 @@ innobase_trx_init( trx->check_unique_secondary = !thd_test_options( thd, OPTION_RELAXED_UNIQUE_CHECKS); +#ifdef EXTENDED_SLOWLOG + if (thd_log_slow_verbosity(thd) & SLOG_V_INNODB) { + trx->take_stats = TRUE; + } else { + trx->take_stats = FALSE; + } +#else + trx->take_stats = FALSE; +#endif + DBUG_VOID_RETURN; } @@ -1396,6 +1431,32 @@ check_trx_exists( } +/************************************************************************* +Gets current trx. */ +extern "C" +trx_t* +innobase_get_trx() +{ + THD *thd=current_thd; + if (likely(thd != 0)) { + trx_t*& trx = thd_to_trx(thd); + return(trx); + } else { + return(NULL); + } +} + +extern "C" +ibool +innobase_get_slow_log() +{ +#ifdef EXTENDED_SLOWLOG + return((ibool) thd_opt_slow_log()); +#else + return(FALSE); +#endif +} + /*********************************************************************//** Construct ha_innobase handler. */ UNIV_INTERN @@ -2294,8 +2355,6 @@ innobase_change_buffering_inited_ok: srv_fast_recovery = (ibool) innobase_fast_recovery; - srv_use_purge_thread = (ibool) innobase_use_purge_thread; - srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; srv_use_checksums = (ibool) innobase_use_checksums; @@ -5871,6 +5930,28 @@ create_table_def( } } + /* First check whether the column to be added has a + system reserved name. */ + if (dict_col_name_is_reserved(field->field_name)){ + push_warning_printf( + (THD*) trx->mysql_thd, + MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_CANT_CREATE_TABLE, + "Error creating table '%s' with " + "column name '%s'. '%s' is a " + "reserved name. Please try to " + "re-create the table with a " + "different column name.", + table->name, (char*) field->field_name, + (char*) field->field_name); + + dict_mem_table_free(table); + trx_commit_for_mysql(trx); + + error = DB_ERROR; + goto error_ret; + } + dict_mem_table_add_col(table, table->heap, (char*) field->field_name, col_type, @@ -5884,6 +5965,7 @@ create_table_def( error = row_create_table_for_mysql(table, trx); +error_ret: error = convert_error_code_to_mysql(error, flags, NULL); DBUG_RETURN(error); @@ -5922,6 +6004,9 @@ create_index( n_fields = key->key_parts; + /* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */ + ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0); + ind_type = 0; if (key_num == form->s->primary_key) { @@ -6030,8 +6115,8 @@ create_clustered_index_when_no_primary( /* We pass 0 as the space id, and determine at a lower level the space id where to store the table */ - - index = dict_mem_index_create(table_name, "GEN_CLUST_INDEX", + index = dict_mem_index_create(table_name, + innobase_index_reserve_name, 0, DICT_CLUSTERED, 0); error = row_create_index_for_mysql(index, trx, NULL); @@ -6457,14 +6542,6 @@ ha_innobase::create( flags = DICT_TF_COMPACT; } - error = create_table_def(trx, form, norm_name, - create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL, - flags); - - if (error) { - goto cleanup; - } - /* Look for a primary key */ primary_key_no= (form->s->primary_key != MAX_KEY ? @@ -6476,6 +6553,22 @@ ha_innobase::create( ut_a(primary_key_no == -1 || primary_key_no == 0); + /* Check for name conflicts (with reserved name) for + any user indices to be created. */ + if (innobase_index_name_is_reserved(trx, form, norm_name)) { + error = -1; + goto cleanup; + } + + error = create_table_def(trx, form, norm_name, + create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL, + flags); + + if (error) { + goto cleanup; + } + + /* Create the keys */ if (form->s->keys == 0 || primary_key_no == -1) { @@ -8127,6 +8220,23 @@ ha_innobase::external_lock( statement has ended */ if (trx->n_mysql_tables_in_use == 0) { +#ifdef EXTENDED_SLOWLOG + increment_thd_innodb_stats(thd, trx->io_reads, + trx->io_read, + trx->io_reads_wait_timer, + trx->lock_que_wait_timer, + trx->innodb_que_wait_timer, + trx->distinct_page_access); + + trx->io_reads = 0; + trx->io_read = 0; + trx->io_reads_wait_timer = 0; + trx->lock_que_wait_timer = 0; + trx->innodb_que_wait_timer = 0; + trx->distinct_page_access = 0; + if (trx->distinct_page_access_hash) + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); +#endif trx->mysql_n_tables_locked = 0; prebuilt->used_in_HANDLER = FALSE; @@ -8414,8 +8524,8 @@ innodb_mutex_show_status( rw_lock_wait_time += mutex->lspent_time; } #else /* UNIV_DEBUG */ - buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu", - mutex->cfile_name, (ulong) mutex->cline); + buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s", + mutex->cmutex_name); buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu", mutex->count_os_wait); @@ -8440,8 +8550,8 @@ next_mutex: while (lock != NULL) { if (lock->count_os_wait && !buf_pool_is_block_lock(lock)) { - buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu", - lock->cfile_name, (ulong) lock->cline); + buf1len= my_snprintf(buf1, sizeof(buf1), "%s", + lock->lock_name); buf2len= my_snprintf(buf2, sizeof(buf2), "os_waits=%lu", lock->count_os_wait); @@ -9905,6 +10015,46 @@ static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff) return 0; } +/*********************************************************************** +This function checks each index name for a table against reserved +system default primary index name 'GEN_CLUST_INDEX'. If a name matches, +this function pushes an error message to the client, and returns true. */ +static +bool +innobase_index_name_is_reserved( +/*============================*/ + /* out: true if an index name + matches the reserved name */ + const trx_t* trx, /* in: InnoDB transaction handle */ + const TABLE* form, /* in: information on table + columns and indexes */ + const char* norm_name) /* in: table name */ +{ + KEY* key; + uint key_num; /* index number */ + + for (key_num = 0; key_num < form->s->keys; key_num++) { + key = form->key_info + key_num; + + if (innobase_strcasecmp(key->name, + innobase_index_reserve_name) == 0) { + /* Push warning to mysql */ + push_warning_printf((THD*) trx->mysql_thd, + MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_CANT_CREATE_TABLE, + "Cannot Create Index with name " + "'%s'. The name is reserved " + "for the system default primary " + "index.", + innobase_index_reserve_name); + + return(true); + } + } + + return(false); +} + static SHOW_VAR innodb_status_variables_export[]= { {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC}, {NullS, NullS, SHOW_LONG} @@ -9937,10 +10087,10 @@ static MYSQL_SYSVAR_BOOL(fast_recovery, innobase_fast_recovery, "Enable to use speed hack of recovery avoiding flush list sorting.", NULL, NULL, FALSE); -static MYSQL_SYSVAR_BOOL(use_purge_thread, innobase_use_purge_thread, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Enable to use purge devoted thread.", - NULL, NULL, FALSE); +static MYSQL_SYSVAR_ULONG(use_purge_thread, srv_use_purge_thread, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Number of purge devoted threads. #### over 1 is EXPERIMENTAL ####", + NULL, NULL, 0, 0, 64, 0); static MYSQL_SYSVAR_BOOL(overwrite_relay_log_info, innobase_overwrite_relay_log_info, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, @@ -10451,6 +10601,7 @@ i_s_innodb_cmpmem, i_s_innodb_cmpmem_reset, i_s_innodb_table_stats, i_s_innodb_index_stats, +i_s_innodb_admin_command, i_s_innodb_patches mysql_declare_plugin_end; diff --git a/handler/handler0alter.cc b/handler/handler0alter.cc index 12f6099eeb3..c85e61d307b 100644 --- a/handler/handler0alter.cc +++ b/handler/handler0alter.cc @@ -670,6 +670,23 @@ err_exit: DBUG_RETURN(error); } + /* Check for name conflicts (with reserved name) for + any user indices to be created. */ + if (innobase_strcasecmp(key_info->name, + "GEN_CLUST_INDEX") == 0) { + /* Push warning to mysql */ + push_warning_printf((THD*) trx->mysql_thd, + MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_CANT_CREATE_TABLE, + "Cannot Create Index with name " + "'%s'. The name is reserved " + "for the system default primary " + "index.", + "GEN_CLUST_INDEX"); + error = ER_CANT_CREATE_TABLE; + goto err_exit; + } + /* Create table containing all indexes to be built in this alter table add index so that they are in the correct order in the table. */ diff --git a/handler/i_s.cc b/handler/i_s.cc index 1a9e304c33d..4b61971f8b0 100644 --- a/handler/i_s.cc +++ b/handler/i_s.cc @@ -2953,3 +2953,140 @@ UNIV_INTERN struct st_mysql_plugin i_s_innodb_index_stats = STRUCT_FLD(system_vars, NULL), STRUCT_FLD(__reserved1, NULL) }; + +/*********************************************************************** +*/ +static ST_FIELD_INFO i_s_innodb_admin_command_info[] = +{ + {STRUCT_FLD(field_name, "result_message"), + STRUCT_FLD(field_length, 1024), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +#ifndef INNODB_COMPATIBILITY_HOOKS +#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS +#endif + +extern "C" { +char **thd_query(MYSQL_THD thd); +} + +static +int +i_s_innodb_admin_command_fill( +/*==========================*/ + THD* thd, + TABLE_LIST* tables, + COND* cond) +{ + TABLE* i_s_table = (TABLE *) tables->table; + CHARSET_INFO *cs= system_charset_info; + char** query_str; + char* ptr; + char quote = '\0'; + char* command_head = "XTRA_"; + + DBUG_ENTER("i_s_innodb_admin_command_fill"); + + /* deny access to non-superusers */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + if(thd_sql_command(thd) != SQLCOM_SELECT) { + field_store_string(i_s_table->field[0], + "SELECT command is only accepted."); + goto end_func; + } + + query_str = thd_query(thd); + ptr = *query_str; + + for (; *ptr; ptr++) { + if (*ptr == quote) { + quote = '\0'; + } else if (quote) { + } else if (*ptr == '`' || *ptr == '"') { + quote = *ptr; + } else { + long i; + for (i = 0; command_head[i]; i++) { + if (toupper((int)(unsigned char)(ptr[i])) + != toupper((int)(unsigned char) + (command_head[i]))) { + goto nomatch; + } + } + break; +nomatch: + ; + } + } + + if (!*ptr) { + field_store_string(i_s_table->field[0], + "No XTRA_* command in the SQL statement." + " Please add /*!XTRA_xxxx*/ to the SQL."); + goto end_func; + } + + if (!strncasecmp("XTRA_HELLO", ptr, 10)) { + /* This is example command XTRA_HELLO */ + + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: administration command test for XtraDB" + " 'XTRA_HELLO' was detected.\n"); + + field_store_string(i_s_table->field[0], + "Hello!"); + goto end_func; + } + + field_store_string(i_s_table->field[0], + "Undefined XTRA_* command."); + goto end_func; + +end_func: + if (schema_table_store_record(thd, i_s_table)) { + DBUG_RETURN(1); + } else { + DBUG_RETURN(0); + } +} + +static +int +i_s_innodb_admin_command_init( +/*==========================*/ + void* p) +{ + DBUG_ENTER("i_s_innodb_admin_command_init"); + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = i_s_innodb_admin_command_info; + schema->fill_table = i_s_innodb_admin_command_fill; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_admin_command = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "XTRADB_ADMIN_COMMAND"), + STRUCT_FLD(author, plugin_author), + STRUCT_FLD(descr, "XtraDB specific command acceptor"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_admin_command_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL) +}; diff --git a/handler/i_s.h b/handler/i_s.h index 7bb03460285..6d0f426acc3 100644 --- a/handler/i_s.h +++ b/handler/i_s.h @@ -40,5 +40,6 @@ extern struct st_mysql_plugin i_s_innodb_patches; extern struct st_mysql_plugin i_s_innodb_rseg; extern struct st_mysql_plugin i_s_innodb_table_stats; extern struct st_mysql_plugin i_s_innodb_index_stats; +extern struct st_mysql_plugin i_s_innodb_admin_command; #endif /* i_s_h */ diff --git a/handler/innodb_patch_info.h b/handler/innodb_patch_info.h index f8a728d0d27..184fd65af63 100644 --- a/handler/innodb_patch_info.h +++ b/handler/innodb_patch_info.h @@ -38,5 +38,8 @@ struct innodb_enhancement { {"innodb_stats","Additional features about InnoDB statistics/optimizer","","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_recovery_patches","Bugfixes and adjustments about recovery process","","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_purge_thread","Enable to use purge devoted thread","","http://www.percona.com/docs/wiki/percona-xtradb"}, +{"innodb_admin_command_base","XtraDB specific command interface through i_s","","http://www.percona.com/docs/wiki/percona-xtradb"}, +{"innodb_show_lock_name","Show mutex/lock name instead of crated file/line","","http://www.percona.com/docs/wiki/percona-xtradb"}, +{"innodb_extend_slow","Extended statistics in slow.log","It is InnoDB-part only. It needs to patch also to mysqld.","http://www.percona.com/docs/wiki/percona-xtradb"}, {NULL, NULL, NULL, NULL} }; diff --git a/include/buf0buf.ic b/include/buf0buf.ic index 9dc104b25b5..014b69a5dc2 100644 --- a/include/buf0buf.ic +++ b/include/buf0buf.ic @@ -825,15 +825,15 @@ buf_page_get_newest_modification( ib_uint64_t lsn; mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); - ut_a(block_mutex); - - if (buf_page_in_file(bpage)) { + if (block_mutex && buf_page_in_file(bpage)) { lsn = bpage->newest_modification; } else { lsn = 0; } - mutex_exit(block_mutex); + if (block_mutex) { + mutex_exit(block_mutex); + } return(lsn); } diff --git a/include/buf0rea.h b/include/buf0rea.h index b4d25e6fde0..e19f50124e5 100644 --- a/include/buf0rea.h +++ b/include/buf0rea.h @@ -27,6 +27,7 @@ Created 11/5/1995 Heikki Tuuri #define buf0rea_h #include "univ.i" +#include "trx0types.h" #include "buf0types.h" /********************************************************************//** @@ -43,7 +44,8 @@ buf_read_page( /*==========*/ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint offset);/*!< in: page number */ + ulint offset, /*!< in: page number */ + trx_t* trx); /********************************************************************//** Applies linear read-ahead if in the buf_pool the page is a border page of a linear read-ahead area and all the pages in the area have been accessed. @@ -74,8 +76,9 @@ buf_read_ahead_linear( /*==================*/ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint offset);/*!< in: page number of a page; NOTE: the current thread + ulint offset, /*!< in: page number of a page; NOTE: the current thread must want access to this page (see NOTE 3 above) */ + trx_t* trx); /********************************************************************//** Issues read requests for pages which the ibuf module wants to read in, in order to contract the insert buffer tree. Technically, this function is like diff --git a/include/fil0fil.h b/include/fil0fil.h index 0470d533dec..edf10b1dd5c 100644 --- a/include/fil0fil.h +++ b/include/fil0fil.h @@ -614,9 +614,12 @@ fil_space_get_n_reserved_extents( Reads or writes data. This operation is asynchronous (aio). @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace which does not exist */ +#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \ + _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL) + UNIV_INTERN ulint -fil_io( +_fil_io( /*===*/ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, ORed to OS_FILE_LOG, if a log i/o @@ -641,8 +644,9 @@ fil_io( void* buf, /*!< in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message); /*!< in: message for aio handler if non-sync + void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ + trx_t* trx); /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the handler for completed requests. The aio array of pending requests is divided diff --git a/include/os0file.h b/include/os0file.h index d8d2f0e5d9e..4dda4476d9e 100644 --- a/include/os0file.h +++ b/include/os0file.h @@ -53,6 +53,7 @@ Created 10/21/1995 Heikki Tuuri #define os0file_h #include "univ.i" +#include "trx0types.h" #ifndef __WIN__ #include @@ -497,9 +498,12 @@ os_file_get_last_error( /*******************************************************************//** Requests a synchronous read operation. @return TRUE if request was successful, FALSE if fail */ +#define os_file_read(file, buf, offset, offset_high, n) \ + _os_file_read(file, buf, offset, offset_high, n, NULL) + UNIV_INTERN ibool -os_file_read( +_os_file_read( /*=========*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ @@ -507,7 +511,8 @@ os_file_read( offset where to read */ ulint offset_high,/*!< in: most significant 32 bits of offset */ - ulint n); /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + trx_t* trx); /*******************************************************************//** Rewind file to its start, read at most size - 1 bytes from it to str, and NUL-terminate str. All errors are silently ignored. This function is @@ -654,10 +659,11 @@ os_aio( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ - void* message2);/*!< in: message for the aio handler + void* message2,/*!< in: message for the aio handler (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + trx_t* trx); /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in shutdown. */ diff --git a/include/srv0srv.h b/include/srv0srv.h index ac409e1093d..4700ca11f7a 100644 --- a/include/srv0srv.h +++ b/include/srv0srv.h @@ -80,6 +80,9 @@ at a time */ #define SRV_AUTO_EXTEND_INCREMENT \ (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE)) +/* prototypes for new functions added to ha_innodb.cc */ +ibool innobase_get_slow_log(); + /* This is set to TRUE if the MySQL user has set it in MySQL */ extern ibool srv_lower_case_table_names; @@ -134,7 +137,7 @@ extern ibool srv_extra_undoslots; extern ibool srv_fast_recovery; -extern ibool srv_use_purge_thread; +extern ulint srv_use_purge_thread; extern ibool srv_auto_extend_last_data_file; extern ulint srv_last_file_size_max; @@ -428,6 +431,7 @@ enum srv_thread_type { SRV_INSERT, /**< thread flushing the insert buffer to disk */ #endif SRV_PURGE, /* thread purging undo records */ + SRV_PURGE_WORKER, /* thread purging undo records */ SRV_MASTER /**< the master thread, (whose type number must be biggest) */ }; @@ -509,6 +513,13 @@ srv_purge_thread( /*=============*/ void* arg); /* in: a dummy parameter required by os_thread_create */ +/************************************************************************* +The undo purge thread. */ +UNIV_INTERN +os_thread_ret_t +srv_purge_worker_thread( +/*====================*/ + void* arg); /*******************************************************************//** Tells the Innobase server that there has been activity in the database and wakes up the master thread if it is suspended (not sleeping). Used diff --git a/include/sync0rw.h b/include/sync0rw.h index aedfd5f3f86..85fa014d77a 100644 --- a/include/sync0rw.h +++ b/include/sync0rw.h @@ -120,7 +120,7 @@ is necessary only if the memory block containing it is freed. */ # endif /* UNIV_SYNC_DEBUG */ #else /* UNIV_DEBUG */ # define rw_lock_create(L, level) \ - rw_lock_create_func((L), __FILE__, __LINE__) + rw_lock_create_func((L), #L, NULL, 0) #endif /* UNIV_DEBUG */ /******************************************************************//** @@ -137,8 +137,8 @@ rw_lock_create_func( # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ #endif /* UNIV_DEBUG */ + const char* cmutex_name, /*!< in: mutex name */ const char* cfile_name, /*!< in: file name where created */ ulint cline); /*!< in: file line where created */ /******************************************************************//** @@ -540,7 +540,8 @@ struct rw_lock_struct { ulint level; /*!< Level in the global latching order. */ #endif /* UNIV_SYNC_DEBUG */ ulint count_os_wait; /*!< Count of os_waits. May not be accurate */ - const char* cfile_name;/*!< File name where lock created */ + //const char* cfile_name;/*!< File name where lock created */ + const char* lock_name;/*!< lock name */ /* last s-lock file/line is not guaranteed to be correct */ const char* last_s_file_name;/*!< File name where last s-locked */ const char* last_x_file_name;/*!< File name where last x-locked */ @@ -551,7 +552,7 @@ struct rw_lock_struct { are at the start of this struct, thus we can peek this field without causing much memory bus traffic */ - unsigned cline:14; /*!< Line where created */ + //unsigned cline:14; /*!< Line where created */ unsigned last_s_line:14; /*!< Line number where last time s-locked */ unsigned last_x_line:14; /*!< Line number where last time x-locked */ ulint magic_n; /*!< RW_LOCK_MAGIC_N */ diff --git a/include/sync0sync.h b/include/sync0sync.h index 966bcced722..a6697638081 100644 --- a/include/sync0sync.h +++ b/include/sync0sync.h @@ -80,7 +80,7 @@ necessary only if the memory block containing it is freed. */ # endif #else # define mutex_create(M, level) \ - mutex_create_func((M), __FILE__, __LINE__) + mutex_create_func((M), #M, NULL, 0) #endif /******************************************************************//** @@ -93,8 +93,8 @@ void mutex_create_func( /*==============*/ mutex_t* mutex, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG const char* cmutex_name, /*!< in: mutex name */ +#ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ @@ -524,9 +524,9 @@ struct mutex_struct { ulint line; /*!< Line where the mutex was locked */ ulint level; /*!< Level in the global latching order */ #endif /* UNIV_SYNC_DEBUG */ +#ifdef UNIV_DEBUG const char* cfile_name;/*!< File name where mutex created */ ulint cline; /*!< Line where created */ -#ifdef UNIV_DEBUG os_thread_id_t thread_id; /*!< The thread id of the thread which locked the mutex. */ ulint magic_n; /*!< MUTEX_MAGIC_N */ @@ -541,9 +541,9 @@ struct mutex_struct { ulong count_os_yield; /*!< count of os_wait */ ulonglong lspent_time; /*!< mutex os_wait timer msec */ ulonglong lmax_spent_time;/*!< mutex os_wait timer msec */ - const char* cmutex_name; /*!< mutex name */ ulint mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */ #endif /* UNIV_DEBUG */ + const char* cmutex_name; /*!< mutex name */ }; /** The global array of wait cells for implementation of the databases own diff --git a/include/trx0purge.h b/include/trx0purge.h index 7812ad7eb92..d9a470f4036 100644 --- a/include/trx0purge.h +++ b/include/trx0purge.h @@ -108,6 +108,25 @@ UNIV_INTERN ulint trx_purge(void); /*===========*/ +/********************************************************************** +This function runs a purge worker batch */ +UNIV_INTERN +void +trx_purge_worker( +/*=============*/ + ulint worker_id); +/********************************************************************** +This function waits the event for worker batch */ +UNIV_INTERN +void +trx_purge_worker_wait(void); +/*========================*/ +/********************************************************************** +This function wakes the waiting worker batch */ +UNIV_INTERN +void +trx_purge_worker_wake(void); +/*========================*/ /******************************************************************//** Prints information of the purge system to stderr. */ UNIV_INTERN @@ -125,6 +144,11 @@ struct trx_purge_struct{ of the trx system and it never ends */ que_t* query; /*!< The query graph which will do the parallelized purge operation */ + ulint n_worker; + os_event_t worker_event; + sess_t** sess_arr; + trx_t** trx_arr; + que_t** query_arr; rw_lock_t latch; /*!< The latch protecting the purge view. A purge operation must acquire an x-latch here for the instant at which diff --git a/include/trx0trx.h b/include/trx0trx.h index da031d9d5d3..2d8987284e5 100644 --- a/include/trx0trx.h +++ b/include/trx0trx.h @@ -729,6 +729,17 @@ struct trx_struct{ /*------------------------------*/ char detailed_error[256]; /*!< detailed error message for last error, or empty. */ + /*------------------------------*/ + ulint io_reads; + ib_uint64_t io_read; + ulint io_reads_wait_timer; + ib_uint64_t lock_que_wait_ustarted; + ulint lock_que_wait_timer; + ulint innodb_que_wait_timer; + ulint distinct_page_access; +#define DPAH_SIZE 8192 + byte* distinct_page_access_hash; + ibool take_stats; }; #define TRX_MAX_N_THREADS 32 /* maximum number of diff --git a/lock/lock0lock.c b/lock/lock0lock.c index f76c27e093c..ea414676b73 100644 --- a/lock/lock0lock.c +++ b/lock/lock0lock.c @@ -1739,6 +1739,8 @@ lock_rec_enqueue_waiting( { lock_t* lock; trx_t* trx; + ulint sec; + ulint ms; ut_ad(mutex_own(&kernel_mutex)); @@ -1797,6 +1799,10 @@ lock_rec_enqueue_waiting( trx->que_state = TRX_QUE_LOCK_WAIT; trx->was_chosen_as_deadlock_victim = FALSE; trx->wait_started = time(NULL); + if (innobase_get_slow_log() && trx->take_stats) { + ut_usectime(&sec, &ms); + trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms; + } ut_a(que_thr_stop(thr)); @@ -3607,6 +3613,8 @@ lock_table_enqueue_waiting( { lock_t* lock; trx_t* trx; + ulint sec; + ulint ms; ut_ad(mutex_own(&kernel_mutex)); @@ -3660,6 +3668,10 @@ lock_table_enqueue_waiting( return(DB_SUCCESS); } + if (innobase_get_slow_log() && trx->take_stats) { + ut_usectime(&sec, &ms); + trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms; + } trx->que_state = TRX_QUE_LOCK_WAIT; trx->was_chosen_as_deadlock_victim = FALSE; trx->wait_started = time(NULL); diff --git a/log/log0log.c b/log/log0log.c index 3a89f540b77..09956bff215 100644 --- a/log/log0log.c +++ b/log/log0log.c @@ -3120,6 +3120,16 @@ loop: goto loop; } + /* Check that the purge threads ended */ + if (srv_use_purge_thread + && (srv_n_threads_active[SRV_PURGE] != 0 + || srv_n_threads_active[SRV_PURGE_WORKER] != 0)) { + + mutex_exit(&kernel_mutex); + + goto loop; + } + mutex_exit(&kernel_mutex); mutex_enter(&(log_sys->mutex)); diff --git a/mysql-test/innodb-index.result b/mysql-test/innodb-index.result index 8772d8354e6..f2c90c54df8 100644 --- a/mysql-test/innodb-index.result +++ b/mysql-test/innodb-index.result @@ -1125,6 +1125,7 @@ t2 CREATE TABLE `t2` ( ) ENGINE=InnoDB DEFAULT CHARSET=latin1 DROP TABLE t2; DROP TABLE t1; +call mtr.add_suppression("InnoDB: insufficient history for index"); CREATE TABLE t1 (a INT, b CHAR(1)) ENGINE=InnoDB; INSERT INTO t1 VALUES (3,'a'),(3,'b'),(1,'c'),(0,'d'),(1,'e'); BEGIN; diff --git a/mysql-test/innodb-index.test b/mysql-test/innodb-index.test index 59f1e5d3d2b..1ec1a503c63 100644 --- a/mysql-test/innodb-index.test +++ b/mysql-test/innodb-index.test @@ -499,6 +499,8 @@ SHOW CREATE TABLE t2; DROP TABLE t2; DROP TABLE t1; +call mtr.add_suppression("InnoDB: insufficient history for index"); + connect (a,localhost,root,,); connect (b,localhost,root,,); connection a; diff --git a/mysql-test/innodb_file_format.result b/mysql-test/innodb_file_format.result index 45285542936..fbc67ada1bb 100644 --- a/mysql-test/innodb_file_format.result +++ b/mysql-test/innodb_file_format.result @@ -1,4 +1,4 @@ -set @old_innodb_file_format=@@innodb_file_format; +call mtr.add_suppression("InnoDB: invalid innodb_file_format_check value"); select @@innodb_file_format; @@innodb_file_format Antelope @@ -43,5 +43,3 @@ ERROR HY000: Incorrect arguments to SET select @@innodb_file_format_check; @@innodb_file_format_check Barracuda -set global innodb_file_format=@old_innodb_file_format; -set global innodb_file_format_check=Antelope; diff --git a/mysql-test/innodb_file_format.test b/mysql-test/innodb_file_format.test index 62f8468a818..3bd1dd2fa6f 100644 --- a/mysql-test/innodb_file_format.test +++ b/mysql-test/innodb_file_format.test @@ -1,5 +1,9 @@ -- source include/have_innodb.inc -set @old_innodb_file_format=@@innodb_file_format; + +call mtr.add_suppression("InnoDB: invalid innodb_file_format_check value"); + +let $format=`select @@innodb_file_format`; +let $innodb_file_format_check_orig=`select @@innodb_file_format_check`; select @@innodb_file_format; select @@innodb_file_format_check; @@ -28,5 +32,11 @@ set global innodb_file_format=on; set global innodb_file_format=off; select @@innodb_file_format_check; -set global innodb_file_format=@old_innodb_file_format; -set global innodb_file_format_check=Antelope; +# +# restore environment to the state it was before this test execution +# + +-- disable_query_log +eval set global innodb_file_format=$format; +eval set global innodb_file_format_check=$innodb_file_format_check_orig; +-- enable_query_log diff --git a/mysql-test/innodb_xtradb_bug317074.result b/mysql-test/innodb_xtradb_bug317074.result index 52c758a5eed..82a98844652 100644 --- a/mysql-test/innodb_xtradb_bug317074.result +++ b/mysql-test/innodb_xtradb_bug317074.result @@ -1,5 +1,4 @@ SET @old_innodb_file_format=@@innodb_file_format; SET @old_innodb_file_per_table=@@innodb_file_per_table; -SET @old_innodb_file_format_check=@@innodb_file_format_check; SET GLOBAL innodb_file_format='Barracuda'; SET GLOBAL innodb_file_per_table=ON; diff --git a/mysql-test/innodb_xtradb_bug317074.test b/mysql-test/innodb_xtradb_bug317074.test index a15d64d6bb5..757ac87bf18 100644 --- a/mysql-test/innodb_xtradb_bug317074.test +++ b/mysql-test/innodb_xtradb_bug317074.test @@ -2,7 +2,7 @@ SET @old_innodb_file_format=@@innodb_file_format; SET @old_innodb_file_per_table=@@innodb_file_per_table; -SET @old_innodb_file_format_check=@@innodb_file_format_check; +let $innodb_file_format_check_orig=`select @@innodb_file_format_check`; SET GLOBAL innodb_file_format='Barracuda'; SET GLOBAL innodb_file_per_table=ON; @@ -41,4 +41,4 @@ ALTER TABLE test1 ENGINE=MyISAM; DROP TABLE test1; SET GLOBAL innodb_file_format=@old_innodb_file_format; SET GLOBAL innodb_file_per_table=@old_innodb_file_per_table; -SET GLOBAL innodb_file_format_check=@old_innodb_file_format_check; +eval set global innodb_file_format_check=$innodb_file_format_check_orig; diff --git a/mysql-test/patches/disabled.def.diff b/mysql-test/patches/disabled.def.diff index e2e20cd000a..b8b3ed2d02d 100644 --- a/mysql-test/patches/disabled.def.diff +++ b/mysql-test/patches/disabled.def.diff @@ -1,9 +1,8 @@ ---- mysql-test/t/disabled.def.orig 2009-09-21 20:12:29.000000000 +0000 -+++ mysql-test/t/disabled.def 2009-09-21 20:20:16.000000000 +0000 -@@ -13,4 +13,6 @@ - innodb_bug39438 : Bug#42383 2009-01-28 lsoares "This fails in embedded and on windows. Note that this test is not run on windows and on embedded in PB for main trees currently" +--- mysql-test/t/disabled.def.orig 2009-10-25 05:54:05.000000000 +0000 ++++ mysql-test/t/disabled.def 2009-10-25 05:54:25.000000000 +0000 +@@ -14,3 +14,5 @@ query_cache_28249 : Bug#43861 2009-03-25 main.query_cache_28249 fails sporadically - init_connect : Bug#44920 2009-07-06 pcrews MTR not processing master.opt input properly on Windows. *Must be done this way due to the nature of the bug* + partition_innodb_builtin : Bug#32430 2009-09-25 mattiasj Waiting for push of Innodb changes + partition_innodb_plugin : Bug#32430 2009-09-25 mattiasj Waiting for push of Innodb changes +read_many_rows_innodb : Bug#433409 2009-09-20 the test fails on 5.1.37 https://bugs.launchpad.net/bugs/433409 +innodb-zip : Bug#47495 2009-09-21 the test fails on ubuntu - diff --git a/mysql-test/patches/innodb_bug46000.diff b/mysql-test/patches/innodb_bug46000.diff new file mode 100644 index 00000000000..d57cbde387e --- /dev/null +++ b/mysql-test/patches/innodb_bug46000.diff @@ -0,0 +1,26 @@ +--- mysql-test/r/innodb_bug46000.result.orig 2009-10-07 03:21:43.000000000 +0900 ++++ mysql-test/r/innodb_bug46000.result 2009-10-28 14:08:55.000000000 +0900 +@@ -8,10 +8,10 @@ + Error 1005 Can't create table 'test.bug46000' (errno: -1) + create table bug46000(id int) engine=innodb; + create index GEN_CLUST_INDEX on bug46000(id); +-ERROR HY000: Can't create table '#sql-temporary' (errno: -1) ++ERROR HY000: Got error 1005 from storage engine + show errors; + Level Code Message + Error 1005 Cannot Create Index with name 'GEN_CLUST_INDEX'. The name is reserved for the system default primary index. +-Error 1005 Can't create table '#sql-temporary' (errno: -1) ++Error 1030 Got error 1005 from storage engine + create index idx on bug46000(id); + drop table bug46000; +--- mysql-test/t/innodb_bug46000.test.orig 2009-10-07 03:21:12.000000000 +0900 ++++ mysql-test/t/innodb_bug46000.test 2009-10-28 14:08:37.000000000 +0900 +@@ -20,7 +20,7 @@ + + # This 'create index' operation should fail. + --replace_regex /'[^']*test.#sql-[0-9a-f_]*'/'#sql-temporary'/ +---error ER_CANT_CREATE_TABLE ++--error ER_GET_ERRNO + create index GEN_CLUST_INDEX on bug46000(id); + + --replace_regex /'[^']*test.#sql-[0-9a-f_]*'/'#sql-temporary'/ diff --git a/os/os0file.c b/os/os0file.c index f961ea2adb2..980c44df82a 100644 --- a/os/os0file.c +++ b/os/os0file.c @@ -55,6 +55,7 @@ Created 10/21/1995 Heikki Tuuri #include "srv0start.h" #include "fil0fil.h" #include "buf0buf.h" +#include "trx0sys.h" #ifndef UNIV_HOTBACKUP # include "os0sync.h" # include "os0thread.h" @@ -2046,20 +2047,28 @@ os_file_flush( /*******************************************************************//** Does a synchronous read operation in Posix. @return number of bytes read, -1 if error */ +#define os_file_pread(file, buf, n, offset, offset_high) \ + _os_file_pread(file, buf, n, offset, offset_high, NULL); + static ssize_t -os_file_pread( +_os_file_pread( /*==========*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ ulint n, /*!< in: number of bytes to read */ ulint offset, /*!< in: least significant 32 bits of file offset from where to read */ - ulint offset_high) /*!< in: most significant 32 bits of + ulint offset_high, /*!< in: most significant 32 bits of offset */ + trx_t* trx) { off_t offs; ssize_t n_bytes; + ulint sec; + ulint ms; + ib_uint64_t start_time; + ib_uint64_t finish_time; ut_a((offset & 0xFFFFFFFFUL) == offset); @@ -2080,6 +2089,15 @@ os_file_pread( os_n_file_reads++; + if (innobase_get_slow_log() && trx && trx->take_stats) + { + trx->io_reads++; + trx->io_read += n; + ut_usectime(&sec, &ms); + start_time = (ib_uint64_t)sec * 1000000 + ms; + } else { + start_time = 0; + } #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD) os_mutex_enter(os_file_count_mutex); os_file_n_pending_preads++; @@ -2093,6 +2111,13 @@ os_file_pread( os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) + { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } + return(n_bytes); #else { @@ -2123,6 +2148,13 @@ os_file_pread( os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) + { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } + return(ret); } #endif @@ -2247,7 +2279,7 @@ Requests a synchronous positioned read operation. @return TRUE if request was successful, FALSE if fail */ UNIV_INTERN ibool -os_file_read( +_os_file_read( /*=========*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ @@ -2255,7 +2287,8 @@ os_file_read( offset where to read */ ulint offset_high, /*!< in: most significant 32 bits of offset */ - ulint n) /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + trx_t* trx) { #ifdef __WIN__ BOOL ret; @@ -2319,7 +2352,7 @@ try_again: os_bytes_read_since_printout += n; try_again: - ret = os_file_pread(file, buf, n, offset, offset_high); + ret = _os_file_pread(file, buf, n, offset, offset_high, trx); if ((ulint)ret == n) { @@ -3211,7 +3244,8 @@ os_aio_array_reserve_slot( offset */ ulint offset_high, /*!< in: most significant 32 bits of offset */ - ulint len) /*!< in: length of the block to read or write */ + ulint len, /*!< in: length of the block to read or write */ + trx_t* trx) { os_aio_slot_t* slot; #ifdef WIN_ASYNC_IO @@ -3482,10 +3516,11 @@ os_aio( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ - void* message2)/*!< in: message for the aio handler + void* message2,/*!< in: message for the aio handler (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + trx_t* trx) { os_aio_array_t* array; os_aio_slot_t* slot; @@ -3524,8 +3559,8 @@ os_aio( wait in the Windows case. */ if (type == OS_FILE_READ) { - return(os_file_read(file, buf, offset, - offset_high, n)); + return(_os_file_read(file, buf, offset, + offset_high, n, trx)); } ut_a(type == OS_FILE_WRITE); @@ -3558,8 +3593,13 @@ try_again: ut_error; } + if (trx && type == OS_FILE_READ) + { + trx->io_reads++; + trx->io_read += n; + } slot = os_aio_array_reserve_slot(type, array, message1, message2, file, - name, buf, offset, offset_high, n); + name, buf, offset, offset_high, n, trx); if (type == OS_FILE_READ) { if (os_aio_use_native_aio) { #ifdef WIN_ASYNC_IO diff --git a/row/row0mysql.c b/row/row0mysql.c index 25946399fb6..8d480191d03 100644 --- a/row/row0mysql.c +++ b/row/row0mysql.c @@ -1770,7 +1770,6 @@ row_create_table_for_mysql( const char* table_name; ulint table_name_len; ulint err; - ulint i; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); #ifdef UNIV_SYNC_DEBUG @@ -1805,15 +1804,6 @@ err_exit: goto err_exit; } - /* Check that no reserved column names are used. */ - for (i = 0; i < dict_table_get_n_user_cols(table); i++) { - if (dict_col_name_is_reserved( - dict_table_get_col_name(table, i))) { - - goto err_exit; - } - } - trx_start_if_not_started(trx); /* The table name is prefixed with the database name and a '/'. diff --git a/srv/srv0srv.c b/srv/srv0srv.c index 2cfc852dc5c..9a1a6998199 100644 --- a/srv/srv0srv.c +++ b/srv/srv0srv.c @@ -103,6 +103,9 @@ Created 10/8/1995 Heikki Tuuri #include "ha_prototypes.h" #include "trx0i_s.h" +/* prototypes for new functions added to ha_innodb.cc */ +ibool innobase_get_slow_log(); + /* This is set to TRUE if the MySQL user has set it in MySQL; currently affects only FOREIGN KEY definition parsing */ UNIV_INTERN ibool srv_lower_case_table_names = FALSE; @@ -162,7 +165,7 @@ UNIV_INTERN ibool srv_extra_undoslots = FALSE; UNIV_INTERN ibool srv_fast_recovery = FALSE; -UNIV_INTERN ibool srv_use_purge_thread = FALSE; +UNIV_INTERN ulint srv_use_purge_thread = 0; /* if TRUE, then we auto-extend the last data file */ UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE; @@ -1154,6 +1157,10 @@ srv_conc_enter_innodb( ibool has_slept = FALSE; srv_conc_slot_t* slot = NULL; ulint i; + ib_uint64_t start_time = 0L; + ib_uint64_t finish_time = 0L; + ulint sec; + ulint ms; if (trx->mysql_thd != NULL && thd_is_replication_slave_thread(trx->mysql_thd)) { @@ -1230,6 +1237,7 @@ retry: switches. */ if (SRV_THREAD_SLEEP_DELAY > 0) { os_thread_sleep(SRV_THREAD_SLEEP_DELAY); + trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY; } trx->op_info = ""; @@ -1285,12 +1293,25 @@ retry: /* Go to wait for the event; when a thread leaves InnoDB it will release this thread */ + if (innobase_get_slow_log() && trx->take_stats) { + ut_usectime(&sec, &ms); + start_time = (ib_uint64_t)sec * 1000000 + ms; + } else { + start_time = 0; + } + trx->op_info = "waiting in InnoDB queue"; os_event_wait(slot->event); trx->op_info = ""; + if (innobase_get_slow_log() && trx->take_stats && start_time) { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->innodb_que_wait_timer += (ulint)(finish_time - start_time); + } + os_fast_mutex_lock(&srv_conc_mutex); srv_conc_n_waiting_threads--; @@ -3130,6 +3151,7 @@ srv_purge_thread( ulint n_pages_purged_sum = 1; /* dummy */ ulint history_len; ulint sleep_ms= 10000; /* initial: 10 sec. */ + ibool can_be_last = FALSE; #ifdef UNIV_DEBUG_THREAD_CREATION fprintf(stderr, "Purge thread starts, id %lu\n", @@ -3142,8 +3164,21 @@ srv_purge_thread( mutex_exit(&kernel_mutex); loop: - if (srv_fast_shutdown && srv_shutdown_state > 0) { - goto exit_func; + if (srv_shutdown_state > 0) { + if (srv_fast_shutdown) { + /* someone other should wait the end of the workers */ + goto exit_func; + } + + mutex_enter(&kernel_mutex); + if (srv_n_threads_active[SRV_PURGE_WORKER]) { + can_be_last = FALSE; + } else { + can_be_last = TRUE; + } + mutex_exit(&kernel_mutex); + + sleep_ms = 10; } os_thread_sleep( sleep_ms * 1000 ); @@ -3164,6 +3199,15 @@ loop: n_pages_purged_sum += n_pages_purged; } while (n_pages_purged); + if (srv_shutdown_state > 0 && can_be_last) { + /* the last trx_purge() is executed without workers */ + goto exit_func; + } + + if (n_pages_purged_sum) { + srv_active_wake_master_thread(); + } + if (n_pages_purged_sum == 0) sleep_ms *= 10; if (sleep_ms > 10000) @@ -3172,9 +3216,62 @@ loop: goto loop; exit_func: - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ + trx_purge_worker_wake(); /* It may not make sense. for safety only */ + /* wake master thread to flush the pages */ + srv_wake_master_thread(); + + mutex_enter(&kernel_mutex); + srv_n_threads_active[SRV_PURGE]--; + mutex_exit(&kernel_mutex); + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + +/************************************************************************* +A thread which is devoted to purge, for take over the master thread's +purging */ +UNIV_INTERN +os_thread_ret_t +srv_purge_worker_thread( +/*====================*/ + void* arg) +{ + ulint worker_id; /* index for array */ + + worker_id = *((ulint*)arg); + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Purge worker thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif + srv_table_reserve_slot(SRV_PURGE_WORKER); + mutex_enter(&kernel_mutex); + srv_n_threads_active[SRV_PURGE_WORKER]++; + mutex_exit(&kernel_mutex); + +loop: + /* purge worker threads only works when srv_shutdown_state==0 */ + /* for safety and exactness. */ + if (srv_shutdown_state > 0) { + goto exit_func; + } + + trx_purge_worker_wait(); + + if (srv_shutdown_state > 0) { + goto exit_func; + } + + trx_purge_worker(worker_id); + + goto loop; + +exit_func: + mutex_enter(&kernel_mutex); + srv_n_threads_active[SRV_PURGE_WORKER]--; + mutex_exit(&kernel_mutex); os_thread_exit(NULL); OS_THREAD_DUMMY_RETURN; diff --git a/srv/srv0start.c b/srv/srv0start.c index e8a9dabdc41..6f29b22e811 100644 --- a/srv/srv0start.c +++ b/srv/srv0start.c @@ -141,9 +141,9 @@ static mutex_t ios_mutex; static ulint ios; /** io_handler_thread parameters for thread identification */ -static ulint n[SRV_MAX_N_IO_THREADS + 5]; +static ulint n[SRV_MAX_N_IO_THREADS + 5 + 64]; /** io_handler_thread identifiers */ -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5]; +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5 + 64]; /** We use this mutex to test the return value of pthread_mutex_trylock on successful locking. HP-UX does NOT return 0, though Linux et al do. */ @@ -1739,8 +1739,17 @@ innobase_start_or_create_for_mysql(void) + (1 + SRV_MAX_N_IO_THREADS)); if (srv_use_purge_thread) { + ulint i; + os_thread_create(&srv_purge_thread, NULL, thread_ids + (4 + SRV_MAX_N_IO_THREADS)); + + for (i = 0; i < srv_use_purge_thread - 1; i++) { + n[5 + i + SRV_MAX_N_IO_THREADS] = i; /* using as index for arrays in purge_sys */ + os_thread_create(&srv_purge_worker_thread, + n + (5 + i + SRV_MAX_N_IO_THREADS), + thread_ids + (5 + i + SRV_MAX_N_IO_THREADS)); + } } #ifdef UNIV_DEBUG /* buf_debug_prints = TRUE; */ diff --git a/sync/sync0arr.c b/sync/sync0arr.c index 0519e13dee0..0e30fd054a8 100644 --- a/sync/sync0arr.c +++ b/sync/sync0arr.c @@ -492,12 +492,12 @@ sync_array_cell_print( mutex = cell->old_wait_mutex; fprintf(file, - "Mutex at %p created file %s line %lu, lock var %lu\n" + "Mutex at %p '%s', lock var %lu\n" #ifdef UNIV_SYNC_DEBUG "Last time reserved in file %s line %lu, " #endif /* UNIV_SYNC_DEBUG */ "waiters flag %lu\n", - (void*) mutex, mutex->cfile_name, (ulong) mutex->cline, + (void*) mutex, mutex->cmutex_name, (ulong) mutex->lock_word, #ifdef UNIV_SYNC_DEBUG mutex->file_name, (ulong) mutex->line, @@ -513,9 +513,8 @@ sync_array_cell_print( rwlock = cell->old_wait_rw_lock; fprintf(file, - " RW-latch at %p created in file %s line %lu\n", - (void*) rwlock, rwlock->cfile_name, - (ulong) rwlock->cline); + " RW-latch at %p '%s'\n", + (void*) rwlock, rwlock->lock_name); writer = rw_lock_get_writer(rwlock); if (writer != RW_LOCK_NOT_LOCKED) { fprintf(file, diff --git a/sync/sync0rw.c b/sync/sync0rw.c index 0ed114e330c..b07bd21dcac 100644 --- a/sync/sync0rw.c +++ b/sync/sync0rw.c @@ -230,8 +230,8 @@ rw_lock_create_func( # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ #endif /* UNIV_DEBUG */ + const char* cmutex_name, /*!< in: mutex name */ const char* cfile_name, /*!< in: file name where created */ ulint cline) /*!< in: file line where created */ { @@ -241,14 +241,15 @@ rw_lock_create_func( #ifndef INNODB_RW_LOCKS_USE_ATOMICS mutex_create(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK); - lock->mutex.cfile_name = cfile_name; - lock->mutex.cline = cline; + ut_d(lock->mutex.cfile_name = cfile_name); + ut_d(lock->mutex.cline = cline); - ut_d(lock->mutex.cmutex_name = cmutex_name); + lock->mutex.cmutex_name = cmutex_name; ut_d(lock->mutex.mutex_type = 1); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ # ifdef UNIV_DEBUG - UT_NOT_USED(cmutex_name); + UT_NOT_USED(cfile_name); + UT_NOT_USED(cline); # endif #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ @@ -268,8 +269,7 @@ rw_lock_create_func( lock->magic_n = RW_LOCK_MAGIC_N; - lock->cfile_name = cfile_name; - lock->cline = (unsigned int) cline; + lock->lock_name = cmutex_name; lock->count_os_wait = 0; lock->last_s_file_name = "not yet reserved"; @@ -304,8 +304,6 @@ rw_lock_free( ut_ad(rw_lock_validate(lock)); ut_a(lock->lock_word == X_LOCK_DECR); - lock->magic_n = 0; - #ifndef INNODB_RW_LOCKS_USE_ATOMICS mutex_free(rw_lock_get_mutex(lock)); #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ @@ -325,6 +323,8 @@ rw_lock_free( UT_LIST_REMOVE(list, rw_lock_list, lock); mutex_exit(&rw_lock_list_mutex); + + lock->magic_n = 0; } #ifdef UNIV_DEBUG @@ -390,10 +390,10 @@ lock_loop: if (srv_print_latch_waits) { fprintf(stderr, "Thread %lu spin wait rw-s-lock at %p" - " cfile %s cline %lu rnds %lu\n", + " '%s' rnds %lu\n", (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) lock, - lock->cfile_name, (ulong) lock->cline, (ulong) i); + lock->lock_name, (ulong) i); } /* We try once again to obtain the lock */ @@ -426,10 +426,9 @@ lock_loop: if (srv_print_latch_waits) { fprintf(stderr, "Thread %lu OS wait rw-s-lock at %p" - " cfile %s cline %lu\n", + " '%s'\n", os_thread_pf(os_thread_get_curr_id()), - (void*) lock, lock->cfile_name, - (ulong) lock->cline); + (void*) lock, lock->lock_name); } /* these stats may not be accurate */ @@ -648,9 +647,9 @@ lock_loop: if (srv_print_latch_waits) { fprintf(stderr, "Thread %lu spin wait rw-x-lock at %p" - " cfile %s cline %lu rnds %lu\n", + " '%s' rnds %lu\n", os_thread_pf(os_thread_get_curr_id()), (void*) lock, - lock->cfile_name, (ulong) lock->cline, (ulong) i); + lock->lock_name, (ulong) i); } sync_array_reserve_cell(sync_primary_wait_array, @@ -671,9 +670,9 @@ lock_loop: if (srv_print_latch_waits) { fprintf(stderr, "Thread %lu OS wait for rw-x-lock at %p" - " cfile %s cline %lu\n", + " '%s'\n", os_thread_pf(os_thread_get_curr_id()), (void*) lock, - lock->cfile_name, (ulong) lock->cline); + lock->lock_name); } /* these stats may not be accurate */ diff --git a/sync/sync0sync.c b/sync/sync0sync.c index 01aa49688b7..9302f6bebf2 100644 --- a/sync/sync0sync.c +++ b/sync/sync0sync.c @@ -237,8 +237,8 @@ void mutex_create_func( /*==============*/ mutex_t* mutex, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG const char* cmutex_name, /*!< in: mutex name */ +#ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ @@ -262,11 +262,13 @@ mutex_create_func( mutex->file_name = "not yet reserved"; mutex->level = level; #endif /* UNIV_SYNC_DEBUG */ +#ifdef UNIV_DEBUG mutex->cfile_name = cfile_name; mutex->cline = cline; +#endif /* UNIV_DEBUG */ mutex->count_os_wait = 0; -#ifdef UNIV_DEBUG mutex->cmutex_name= cmutex_name; +#ifdef UNIV_DEBUG mutex->count_using= 0; mutex->mutex_type= 0; mutex->lspent_time= 0; @@ -498,9 +500,9 @@ spin_loop: #ifdef UNIV_SRV_PRINT_LATCH_WAITS fprintf(stderr, "Thread %lu spin wait mutex at %p" - " cfile %s cline %lu rnds %lu\n", + " '%s' rnds %lu\n", (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex, - mutex->cfile_name, (ulong) mutex->cline, (ulong) i); + mutex->cmutex_name, (ulong) i); #endif mutex_spin_round_count += i; @@ -575,9 +577,9 @@ spin_loop: #ifdef UNIV_SRV_PRINT_LATCH_WAITS fprintf(stderr, - "Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n", + "Thread %lu OS wait mutex at %p '%s' rnds %lu\n", (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex, - mutex->cfile_name, (ulong) mutex->cline, (ulong) i); + mutex->cmutex_name, (ulong) i); #endif mutex_os_wait_count++; @@ -873,9 +875,8 @@ sync_thread_levels_g( if (mutex->magic_n == MUTEX_MAGIC_N) { fprintf(stderr, - "Mutex created at %s %lu\n", - mutex->cfile_name, - (ulong) mutex->cline); + "Mutex '%s'\n", + mutex->cmutex_name); if (mutex_get_lock_word(mutex) != 0) { const char* file_name; diff --git a/trx/trx0purge.c b/trx/trx0purge.c index cd79fd1c315..62fbd1c1446 100644 --- a/trx/trx0purge.c +++ b/trx/trx0purge.c @@ -184,8 +184,9 @@ this query graph. @return own: the query graph */ static que_t* -trx_purge_graph_build(void) +trx_purge_graph_build( /*=======================*/ + trx_t* trx) { mem_heap_t* heap; que_fork_t* fork; @@ -194,7 +195,7 @@ trx_purge_graph_build(void) heap = mem_heap_create(512); fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap); - fork->trx = purge_sys->trx; + fork->trx = trx; thr = que_thr_create(fork, heap); @@ -243,10 +244,35 @@ trx_purge_sys_create(void) ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED)); - purge_sys->query = trx_purge_graph_build(); + purge_sys->query = trx_purge_graph_build(purge_sys->trx); purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero, purge_sys->heap); + + purge_sys->n_worker = 0; + if (srv_use_purge_thread > 1) { + /* Use worker threads */ + ulint i; + + purge_sys->n_worker = srv_use_purge_thread - 1; + + purge_sys->sess_arr = mem_alloc(sizeof(sess_t*) * purge_sys->n_worker); + purge_sys->trx_arr = mem_alloc(sizeof(trx_t*) * purge_sys->n_worker); + purge_sys->query_arr = mem_alloc(sizeof(que_t*) * purge_sys->n_worker); + + purge_sys->worker_event = os_event_create(NULL); + os_event_reset(purge_sys->worker_event); + + for (i = 0; i < purge_sys->n_worker; i++) { + purge_sys->sess_arr[i] = sess_open(); + + purge_sys->trx_arr[i] = purge_sys->sess_arr[i]->trx; + purge_sys->trx_arr[i]->is_purge = 1; + ut_a(trx_start_low(purge_sys->trx_arr[i], ULINT_UNDEFINED)); + + purge_sys->query_arr[i] = trx_purge_graph_build(purge_sys->trx_arr[i]); + } + } } /*================ UNDO LOG HISTORY LIST =============================*/ @@ -1110,7 +1136,7 @@ trx_purge(void) /* Handle at most 20 undo log pages in one purge batch */ - purge_sys->handle_limit = purge_sys->n_pages_handled + 20; + purge_sys->handle_limit = purge_sys->n_pages_handled + 20 * (srv_use_purge_thread + 1); old_pages_handled = purge_sys->n_pages_handled; @@ -1129,6 +1155,9 @@ trx_purge(void) mutex_exit(&kernel_mutex); + if (purge_sys->n_worker) + os_event_set(purge_sys->worker_event); + /* srv_que_task_enqueue(thr2); */ if (srv_print_thread_releases) { @@ -1138,6 +1167,9 @@ trx_purge(void) que_run_threads(thr); + if (purge_sys->n_worker) + os_event_reset(purge_sys->worker_event); + if (srv_print_thread_releases) { fprintf(stderr, @@ -1148,6 +1180,52 @@ trx_purge(void) return(purge_sys->n_pages_handled - old_pages_handled); } +/********************************************************************** +This function runs a purge worker batch */ +UNIV_INTERN +void +trx_purge_worker( +/*=============*/ + ulint worker_id) +{ + que_thr_t* thr; + + mutex_enter(&kernel_mutex); + + thr = que_fork_start_command(purge_sys->query_arr[worker_id]); + + ut_ad(thr); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + + if (purge_sys->state == TRX_STOP_PURGE) { /* optimistic */ + os_event_reset(purge_sys->worker_event); + } +} + +/********************************************************************** +This function waits the event for worker batch */ +UNIV_INTERN +void +trx_purge_worker_wait(void) +/*=======================*/ +{ + os_event_wait(purge_sys->worker_event); +} + +/********************************************************************** +This function wakes the waiting worker batch */ +UNIV_INTERN +void +trx_purge_worker_wake(void) +/*=======================*/ +{ + if (purge_sys->n_worker) + os_event_set(purge_sys->worker_event); +} + /******************************************************************//** Prints information of the purge system to stderr. */ UNIV_INTERN diff --git a/trx/trx0trx.c b/trx/trx0trx.c index 12253c131a8..b7fedf58c2a 100644 --- a/trx/trx0trx.c +++ b/trx/trx0trx.c @@ -178,6 +178,15 @@ trx_create( trx->global_read_view = NULL; trx->read_view = NULL; + trx->io_reads = 0; + trx->io_read = 0; + trx->io_reads_wait_timer = 0; + trx->lock_que_wait_timer = 0; + trx->innodb_que_wait_timer = 0; + trx->distinct_page_access = 0; + trx->distinct_page_access_hash = NULL; + trx->take_stats = FALSE; + /* Set X/Open XA transaction identification to NULL */ memset(&trx->xid, 0, sizeof(trx->xid)); trx->xid.formatID = -1; @@ -215,6 +224,11 @@ trx_allocate_for_mysql(void) trx->mysql_process_no = os_proc_get_number(); + if (innobase_get_slow_log() && trx->take_stats) { + trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); + } + return(trx); } @@ -346,6 +360,12 @@ trx_free_for_mysql( /*===============*/ trx_t* trx) /*!< in, own: trx object */ { + if (trx->distinct_page_access_hash) + { + mem_free(trx->distinct_page_access_hash); + trx->distinct_page_access_hash= NULL; + } + mutex_enter(&kernel_mutex); UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx); @@ -367,6 +387,12 @@ trx_free_for_background( /*====================*/ trx_t* trx) /*!< in, own: trx object */ { + if (trx->distinct_page_access_hash) + { + mem_free(trx->distinct_page_access_hash); + trx->distinct_page_access_hash= NULL; + } + mutex_enter(&kernel_mutex); trx_free(trx); @@ -1072,6 +1098,9 @@ trx_end_lock_wait( trx_t* trx) /*!< in: transaction */ { que_thr_t* thr; + ulint sec; + ulint ms; + ib_uint64_t now; ut_ad(mutex_own(&kernel_mutex)); ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); @@ -1086,6 +1115,11 @@ trx_end_lock_wait( thr = UT_LIST_GET_FIRST(trx->wait_thrs); } + if (innobase_get_slow_log() && trx->take_stats) { + ut_usectime(&sec, &ms); + now = (ib_uint64_t)sec * 1000000 + ms; + trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); + } trx->que_state = TRX_QUE_RUNNING; } @@ -1099,6 +1133,9 @@ trx_lock_wait_to_suspended( trx_t* trx) /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */ { que_thr_t* thr; + ulint sec; + ulint ms; + ib_uint64_t now; ut_ad(mutex_own(&kernel_mutex)); ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); @@ -1113,6 +1150,11 @@ trx_lock_wait_to_suspended( thr = UT_LIST_GET_FIRST(trx->wait_thrs); } + if (innobase_get_slow_log() && trx->take_stats) { + ut_usectime(&sec, &ms); + now = (ib_uint64_t)sec * 1000000 + ms; + trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); + } trx->que_state = TRX_QUE_RUNNING; }