diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c index 6af3dff460a..32435bd1f87 100644 --- a/storage/innodb_plugin/buf/buf0buf.c +++ b/storage/innodb_plugin/buf/buf0buf.c @@ -1489,7 +1489,7 @@ buf_pool_resize(void) /********************************************************************//** Moves a page to the start of the buffer pool LRU list. This high-level -function can be used to prevent an important page from from slipping out of +function can be used to prevent an important page from slipping out of the buffer pool. */ UNIV_INTERN void @@ -1506,6 +1506,36 @@ buf_page_make_young( buf_pool_mutex_exit(); } +/********************************************************************//** +Sets the time of the first access of a page and moves a page to the +start of the buffer pool LRU list if it is too old. This high-level +function can be used to prevent an important page from slipping +out of the buffer pool. */ +static +void +buf_page_set_accessed_make_young( +/*=============================*/ + buf_page_t* bpage, /*!< in/out: buffer block of a + file page */ + unsigned access_time) /*!< in: bpage->access_time + read under mutex protection, + or 0 if unknown */ +{ + ut_ad(!buf_pool_mutex_own()); + ut_a(buf_page_in_file(bpage)); + + if (buf_page_peek_if_too_old(bpage)) { + buf_pool_mutex_enter(); + buf_LRU_make_block_young(bpage); + buf_pool_mutex_exit(); + } else if (!access_time) { + ulint time_ms = ut_time_ms(); + buf_pool_mutex_enter(); + buf_page_set_accessed(bpage, time_ms); + buf_pool_mutex_exit(); + } +} + /********************************************************************//** Resets the check_index_page_at_flush field of a page if found in the buffer pool. */ @@ -1637,6 +1667,7 @@ buf_page_get_zip( buf_page_t* bpage; mutex_t* block_mutex; ibool must_read; + unsigned access_time; #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside()); @@ -1704,17 +1735,14 @@ err_exit: got_block: must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ; - - if (buf_page_peek_if_too_old(bpage)) { - buf_LRU_make_block_young(bpage); - } - - buf_page_set_accessed(bpage); + access_time = buf_page_is_accessed(bpage); buf_pool_mutex_exit(); mutex_exit(block_mutex); + buf_page_set_accessed_make_young(bpage, access_time); + #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(!bpage->file_page_was_freed); #endif @@ -2244,14 +2272,10 @@ wait_until_unfixed: access_time = buf_page_is_accessed(&block->page); - if (buf_page_peek_if_too_old(&block->page)) { - buf_LRU_make_block_young(&block->page); - } - - buf_page_set_accessed(&block->page); - buf_pool_mutex_exit(); + buf_page_set_accessed_make_young(&block->page, access_time); + #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(!block->page.file_page_was_freed); #endif @@ -2353,18 +2377,13 @@ buf_page_optimistic_get_func( mutex_exit(&block->mutex); - buf_pool_mutex_enter(); + /* Check if this is the first access to the page. + We do a dirty read on purpose, to avoid mutex contention. + This field is only used for heuristic purposes; it does not + affect correctness. */ - /* Check if this is the first access to the page */ access_time = buf_page_is_accessed(&block->page); - - if (buf_page_peek_if_too_old(&block->page)) { - buf_LRU_make_block_young(&block->page); - } - - buf_page_set_accessed(&block->page); - - buf_pool_mutex_exit(); + buf_page_set_accessed_make_young(&block->page, access_time); ut_ad(!ibuf_inside() || ibuf_page(buf_block_get_space(block), @@ -2477,16 +2496,22 @@ buf_page_get_known_nowait( mutex_exit(&block->mutex); - buf_pool_mutex_enter(); - if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) { + buf_pool_mutex_enter(); buf_LRU_make_block_young(&block->page); + buf_pool_mutex_exit(); + } else if (!buf_page_is_accessed(&block->page)) { + /* Above, we do a dirty read on purpose, to avoid + mutex contention. The field buf_page_t::access_time + is only used for heuristic purposes. Writes to the + field must be protected by mutex, however. */ + ulint time_ms = ut_time_ms(); + + buf_pool_mutex_enter(); + buf_page_set_accessed(&block->page, time_ms); + buf_pool_mutex_exit(); } - buf_page_set_accessed(&block->page); - - buf_pool_mutex_exit(); - ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD)); if (rw_latch == RW_S_LATCH) { @@ -2917,6 +2942,7 @@ buf_page_create( buf_frame_t* frame; buf_block_t* block; buf_block_t* free_block = NULL; + ulint time_ms = ut_time_ms(); ut_ad(mtr); ut_ad(space || !zip_size); @@ -3000,7 +3026,7 @@ buf_page_create( rw_lock_x_unlock(&block->lock); } - buf_page_set_accessed(&block->page); + buf_page_set_accessed(&block->page, time_ms); buf_pool_mutex_exit(); diff --git a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c index e7cf852e200..dd632ace4ac 100644 --- a/storage/innodb_plugin/buf/buf0lru.c +++ b/storage/innodb_plugin/buf/buf0lru.c @@ -1866,7 +1866,9 @@ buf_LRU_old_ratio_update( buf_LRU_old_ratio = ratio; } - return(ratio * 100 / BUF_LRU_OLD_RATIO_DIV); + /* the reverse of + ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */ + return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5)); } /********************************************************************//** diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index d279f0ba4f3..1d72d09727e 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -868,17 +868,14 @@ convert_error_code_to_mysql( return(ER_PRIMARY_CANT_HAVE_NULL); case DB_TOO_MANY_CONCURRENT_TRXS: - /* Once MySQL add the appropriate code to errmsg.txt then - we can get rid of this #ifdef. NOTE: The code checked by - the #ifdef is the suggested name for the error condition - and the actual error code name could very well be different. - This will require some monitoring, ie. the status - of this request on our part.*/ -#ifdef ER_TOO_MANY_CONCURRENT_TRXS - return(ER_TOO_MANY_CONCURRENT_TRXS); -#else + /* New error code HA_ERR_TOO_MANY_CONCURRENT_TRXS is only + available in 5.1.38 and later, but the plugin should still + work with previous versions of MySQL. */ +#ifdef HA_ERR_TOO_MANY_CONCURRENT_TRXS + return(HA_ERR_TOO_MANY_CONCURRENT_TRXS); +#else /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */ return(HA_ERR_RECORD_FILE_FULL); -#endif +#endif /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */ case DB_UNSUPPORTED: return(HA_ERR_UNSUPPORTED); } diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h index db95aa7ce21..e9d95a14f1b 100644 --- a/storage/innodb_plugin/include/buf0buf.h +++ b/storage/innodb_plugin/include/buf0buf.h @@ -346,7 +346,7 @@ buf_page_release( mtr_t* mtr); /*!< in: mtr */ /********************************************************************//** Moves a page to the start of the buffer pool LRU list. This high-level -function can be used to prevent an important page from from slipping out of +function can be used to prevent an important page from slipping out of the buffer pool. */ UNIV_INTERN void @@ -821,7 +821,8 @@ UNIV_INLINE void buf_page_set_accessed( /*==================*/ - buf_page_t* bpage) /*!< in/out: control block */ + buf_page_t* bpage, /*!< in/out: control block */ + ulint time_ms) /*!< in: ut_time_ms() */ __attribute__((nonnull)); /*********************************************************************//** Gets the buf_block_t handle of a buffered file block if an uncompressed diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic index 18ee4551eb4..8b1f904a090 100644 --- a/storage/innodb_plugin/include/buf0buf.ic +++ b/storage/innodb_plugin/include/buf0buf.ic @@ -72,10 +72,16 @@ buf_page_peek_if_too_old( /*=====================*/ const buf_page_t* bpage) /*!< in: block to make younger */ { - if (buf_LRU_old_threshold_ms && bpage->old) { + if (UNIV_UNLIKELY(buf_pool->freed_page_clock == 0)) { + /* If eviction has not started yet, do not update the + statistics or move blocks in the LRU list. This is + either the warm-up phase or an in-memory workload. */ + return(FALSE); + } else if (buf_LRU_old_threshold_ms && bpage->old) { unsigned access_time = buf_page_is_accessed(bpage); - if (access_time && ut_time_ms() - access_time + if (access_time > 0 + && (ut_time_ms() - access_time) >= buf_LRU_old_threshold_ms) { return(TRUE); } @@ -85,10 +91,10 @@ buf_page_peek_if_too_old( } else { /* FIXME: bpage->freed_page_clock is 31 bits */ return((buf_pool->freed_page_clock & ((1UL << 31) - 1)) - > bpage->freed_page_clock - + (buf_pool->curr_size - * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio) - / (BUF_LRU_OLD_RATIO_DIV * 4))); + > ((ulint) bpage->freed_page_clock + + (buf_pool->curr_size + * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio) + / (BUF_LRU_OLD_RATIO_DIV * 4)))); } } @@ -490,14 +496,15 @@ UNIV_INLINE void buf_page_set_accessed( /*==================*/ - buf_page_t* bpage) /*!< in/out: control block */ + buf_page_t* bpage, /*!< in/out: control block */ + ulint time_ms) /*!< in: ut_time_ms() */ { ut_a(buf_page_in_file(bpage)); ut_ad(buf_pool_mutex_own()); if (!bpage->access_time) { /* Make this the time of the first access. */ - bpage->access_time = ut_time_ms(); + bpage->access_time = time_ms; } } diff --git a/storage/innodb_plugin/include/trx0roll.h b/storage/innodb_plugin/include/trx0roll.h index 37205dcd4b3..1dee5655c8c 100644 --- a/storage/innodb_plugin/include/trx0roll.h +++ b/storage/innodb_plugin/include/trx0roll.h @@ -133,6 +133,17 @@ trx_rollback( Rollback or clean up any incomplete transactions which were encountered in crash recovery. If the transaction already was committed, then we clean up a possible insert undo log. If the +transaction was not yet committed, then we roll it back. */ +UNIV_INTERN +void +trx_rollback_or_clean_recovered( +/*============================*/ + ibool all); /*!< in: FALSE=roll back dictionary transactions; + TRUE=roll back all non-PREPARED transactions */ +/*******************************************************************//** +Rollback or clean up any incomplete transactions which were +encountered in crash recovery. If the transaction already was +committed, then we clean up a possible insert undo log. If the transaction was not yet committed, then we roll it back. Note: this is done in a background thread. @return a dummy parameter */ diff --git a/storage/innodb_plugin/include/trx0trx.h b/storage/innodb_plugin/include/trx0trx.h index 4b79732b4bf..d2a59740c93 100644 --- a/storage/innodb_plugin/include/trx0trx.h +++ b/storage/innodb_plugin/include/trx0trx.h @@ -179,7 +179,7 @@ trx_commit_off_kernel( /****************************************************************//** Cleans up a transaction at database startup. The cleanup is needed if the transaction already got to the middle of a commit when the database -crashed, andf we cannot roll it back. */ +crashed, and we cannot roll it back. */ UNIV_INTERN void trx_cleanup_at_db_startup( diff --git a/storage/innodb_plugin/log/log0recv.c b/storage/innodb_plugin/log/log0recv.c index 971c5600d39..e2d85b2f1bb 100644 --- a/storage/innodb_plugin/log/log0recv.c +++ b/storage/innodb_plugin/log/log0recv.c @@ -3118,6 +3118,11 @@ recv_recovery_from_checkpoint_finish(void) #ifndef UNIV_LOG_DEBUG recv_sys_free(); #endif + /* Roll back any recovered data dictionary transactions, so + that the data dictionary tables will be free of any locks. + The data dictionary latch should guarantee that there is at + most one data dictionary transaction active at a time. */ + trx_rollback_or_clean_recovered(FALSE); /* Drop partially created indexes. */ row_merge_drop_temp_indexes(); diff --git a/storage/innodb_plugin/plug.in b/storage/innodb_plugin/plug.in index 230c6ed34df..195800462aa 100644 --- a/storage/innodb_plugin/plug.in +++ b/storage/innodb_plugin/plug.in @@ -112,7 +112,7 @@ MYSQL_PLUGIN_ACTIONS(innodb_plugin, [ [ AC_MSG_RESULT(no) ] - ) + ) AC_MSG_CHECKING(whether pthread_t can be used by GCC atomic builtins) AC_TRY_RUN( @@ -142,7 +142,7 @@ MYSQL_PLUGIN_ACTIONS(innodb_plugin, [ [ AC_MSG_RESULT(no) ] - ) + ) # Try using solaris atomics on SunOS if GCC atomics are not available AC_CHECK_DECLS( diff --git a/storage/innodb_plugin/trx/trx0roll.c b/storage/innodb_plugin/trx/trx0roll.c index b139a8a19af..c925478cdf4 100644 --- a/storage/innodb_plugin/trx/trx0roll.c +++ b/storage/innodb_plugin/trx/trx0roll.c @@ -532,28 +532,26 @@ trx_rollback_active( Rollback or clean up any incomplete transactions which were encountered in crash recovery. If the transaction already was committed, then we clean up a possible insert undo log. If the -transaction was not yet committed, then we roll it back. -Note: this is done in a background thread. -@return a dummy parameter */ +transaction was not yet committed, then we roll it back. */ UNIV_INTERN -os_thread_ret_t -trx_rollback_or_clean_all_recovered( -/*================================*/ - void* arg __attribute__((unused))) - /*!< in: a dummy parameter required by - os_thread_create */ +void +trx_rollback_or_clean_recovered( +/*============================*/ + ibool all) /*!< in: FALSE=roll back dictionary transactions; + TRUE=roll back all non-PREPARED transactions */ { trx_t* trx; mutex_enter(&kernel_mutex); - if (UT_LIST_GET_FIRST(trx_sys->trx_list)) { + if (!UT_LIST_GET_FIRST(trx_sys->trx_list)) { + goto leave_function; + } + if (all) { fprintf(stderr, "InnoDB: Starting in background the rollback" " of uncommitted transactions\n"); - } else { - goto leave_function; } mutex_exit(&kernel_mutex); @@ -582,18 +580,42 @@ loop: goto loop; case TRX_ACTIVE: - mutex_exit(&kernel_mutex); - trx_rollback_active(trx); - goto loop; + if (all || trx_get_dict_operation(trx) + != TRX_DICT_OP_NONE) { + mutex_exit(&kernel_mutex); + trx_rollback_active(trx); + goto loop; + } } } - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Rollback of non-prepared transactions completed\n"); + if (all) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Rollback of non-prepared" + " transactions completed\n"); + } leave_function: mutex_exit(&kernel_mutex); +} + +/*******************************************************************//** +Rollback or clean up any incomplete transactions which were +encountered in crash recovery. If the transaction already was +committed, then we clean up a possible insert undo log. If the +transaction was not yet committed, then we roll it back. +Note: this is done in a background thread. +@return a dummy parameter */ +UNIV_INTERN +os_thread_ret_t +trx_rollback_or_clean_all_recovered( +/*================================*/ + void* arg __attribute__((unused))) + /*!< in: a dummy parameter required by + os_thread_create */ +{ + trx_rollback_or_clean_recovered(TRUE); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ diff --git a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c index 4d4885062a6..1e36a2e4fe7 100644 --- a/storage/innodb_plugin/trx/trx0trx.c +++ b/storage/innodb_plugin/trx/trx0trx.c @@ -950,7 +950,7 @@ trx_commit_off_kernel( /****************************************************************//** Cleans up a transaction at database startup. The cleanup is needed if the transaction already got to the middle of a commit when the database -crashed, andf we cannot roll it back. */ +crashed, and we cannot roll it back. */ UNIV_INTERN void trx_cleanup_at_db_startup( diff --git a/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_solaris.c b/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_solaris.c index a18a537d1d4..310603c7503 100644 --- a/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_solaris.c +++ b/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_solaris.c @@ -17,18 +17,38 @@ Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ /***************************************************************************** -If this program compiles, then pthread_t objects can be used as arguments -to Solaris libc atomic functions. +If this program compiles and returns 0, then pthread_t objects can be used as +arguments to Solaris libc atomic functions. Created April 18, 2009 Vasil Dimov *****************************************************************************/ #include +#include int main(int argc, char** argv) { - pthread_t x = 0; + pthread_t x1; + pthread_t x2; + pthread_t x3; + + memset(&x1, 0x0, sizeof(x1)); + memset(&x2, 0x0, sizeof(x2)); + memset(&x3, 0x0, sizeof(x3)); + + if (sizeof(pthread_t) == 4) { + + atomic_cas_32(&x1, x2, x3); + + } else if (sizeof(pthread_t) == 8) { + + atomic_cas_64(&x1, x2, x3); + + } else { + + return(1); + } return(0); }