diff --git a/mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result b/mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result new file mode 100644 index 00000000000..d627aa92ba7 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result @@ -0,0 +1,92 @@ +SET @start_global_value = @@global.innodb_random_read_ahead; +SELECT @start_global_value; +@start_global_value +0 +Valid values are 'ON' and 'OFF' +select @@global.innodb_random_read_ahead in (0, 1); +@@global.innodb_random_read_ahead in (0, 1) +1 +select @@global.innodb_random_read_ahead; +@@global.innodb_random_read_ahead +0 +select @@session.innodb_random_read_ahead; +ERROR HY000: Variable 'innodb_random_read_ahead' is a GLOBAL variable +show global variables like 'innodb_random_read_ahead'; +Variable_name Value +innodb_random_read_ahead OFF +show session variables like 'innodb_random_read_ahead'; +Variable_name Value +innodb_random_read_ahead OFF +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD OFF +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD OFF +set global innodb_random_read_ahead='ON'; +select @@global.innodb_random_read_ahead; +@@global.innodb_random_read_ahead +1 +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD ON +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD ON +set @@global.innodb_random_read_ahead=0; +select @@global.innodb_random_read_ahead; +@@global.innodb_random_read_ahead +0 +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD OFF +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD OFF +set global innodb_random_read_ahead=1; +select @@global.innodb_random_read_ahead; +@@global.innodb_random_read_ahead +1 +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD ON +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD ON +set @@global.innodb_random_read_ahead='OFF'; +select @@global.innodb_random_read_ahead; +@@global.innodb_random_read_ahead +0 +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD OFF +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD OFF +set session innodb_random_read_ahead='OFF'; +ERROR HY000: Variable 'innodb_random_read_ahead' is a GLOBAL variable and should be set with SET GLOBAL +set @@session.innodb_random_read_ahead='ON'; +ERROR HY000: Variable 'innodb_random_read_ahead' is a GLOBAL variable and should be set with SET GLOBAL +set global innodb_random_read_ahead=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_random_read_ahead' +set global innodb_random_read_ahead=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_random_read_ahead' +set global innodb_random_read_ahead=2; +ERROR 42000: Variable 'innodb_random_read_ahead' can't be set to the value of '2' +NOTE: The following should fail with ER_WRONG_VALUE_FOR_VAR (BUG#50643) +set global innodb_random_read_ahead=-3; +select @@global.innodb_random_read_ahead; +@@global.innodb_random_read_ahead +1 +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD ON +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_RANDOM_READ_AHEAD ON +set global innodb_random_read_ahead='AUTO'; +ERROR 42000: Variable 'innodb_random_read_ahead' can't be set to the value of 'AUTO' +SET @@global.innodb_random_read_ahead = @start_global_value; +SELECT @@global.innodb_random_read_ahead; +@@global.innodb_random_read_ahead +0 diff --git a/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test b/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test new file mode 100644 index 00000000000..f78223bad02 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test @@ -0,0 +1,70 @@ + + +# 2010-01-25 - Added +# + +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_random_read_ahead; +SELECT @start_global_value; + +# +# exists as global only +# +--echo Valid values are 'ON' and 'OFF' +select @@global.innodb_random_read_ahead in (0, 1); +select @@global.innodb_random_read_ahead; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_random_read_ahead; +show global variables like 'innodb_random_read_ahead'; +show session variables like 'innodb_random_read_ahead'; +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; + +# +# show that it's writable +# +set global innodb_random_read_ahead='ON'; +select @@global.innodb_random_read_ahead; +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +set @@global.innodb_random_read_ahead=0; +select @@global.innodb_random_read_ahead; +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +set global innodb_random_read_ahead=1; +select @@global.innodb_random_read_ahead; +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +set @@global.innodb_random_read_ahead='OFF'; +select @@global.innodb_random_read_ahead; +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +--error ER_GLOBAL_VARIABLE +set session innodb_random_read_ahead='OFF'; +--error ER_GLOBAL_VARIABLE +set @@session.innodb_random_read_ahead='ON'; + +# +# incorrect types +# +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_random_read_ahead=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_random_read_ahead=1e1; +--error ER_WRONG_VALUE_FOR_VAR +set global innodb_random_read_ahead=2; +--echo NOTE: The following should fail with ER_WRONG_VALUE_FOR_VAR (BUG#50643) +set global innodb_random_read_ahead=-3; +select @@global.innodb_random_read_ahead; +select * from information_schema.global_variables where variable_name='innodb_random_read_ahead'; +select * from information_schema.session_variables where variable_name='innodb_random_read_ahead'; +--error ER_WRONG_VALUE_FOR_VAR +set global innodb_random_read_ahead='AUTO'; + +# +# Cleanup +# + +SET @@global.innodb_random_read_ahead = @start_global_value; +SELECT @@global.innodb_random_read_ahead; diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c index fe311ebfd8d..648e53ac4a6 100644 --- a/storage/innobase/buf/buf0buf.c +++ b/storage/innobase/buf/buf0buf.c @@ -400,6 +400,7 @@ buf_get_total_stat( tot_stat->n_pages_read += buf_stat->n_pages_read; tot_stat->n_pages_written += buf_stat->n_pages_written; tot_stat->n_pages_created += buf_stat->n_pages_created; + tot_stat->n_ra_pages_read_rnd += buf_stat->n_ra_pages_read_rnd; tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read; tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted; tot_stat->n_pages_made_young += buf_stat->n_pages_made_young; @@ -2358,6 +2359,9 @@ loop2: } if (buf_read_page(space, zip_size, offset)) { + buf_read_ahead_random(space, zip_size, offset, + ibuf_inside(mtr)); + retries = 0; } else if (retries < BUF_PAGE_READ_MAX_RETRIES) { ++retries; @@ -4381,6 +4385,7 @@ buf_stats_aggregate_pool_info( total_info->n_pages_created += pool_info->n_pages_created; total_info->n_pages_written += pool_info->n_pages_written; total_info->n_page_gets += pool_info->n_page_gets; + total_info->n_ra_pages_read_rnd += pool_info->n_ra_pages_read_rnd; total_info->n_ra_pages_read += pool_info->n_ra_pages_read; total_info->n_ra_pages_evicted += pool_info->n_ra_pages_evicted; total_info->page_made_young_rate += pool_info->page_made_young_rate; @@ -4393,6 +4398,7 @@ buf_stats_aggregate_pool_info( total_info->page_read_delta += pool_info->page_read_delta; total_info->young_making_delta += pool_info->young_making_delta; total_info->not_young_making_delta += pool_info->not_young_making_delta; + total_info->pages_readahead_rnd_rate += pool_info->pages_readahead_rnd_rate; total_info->pages_readahead_rate += pool_info->pages_readahead_rate; total_info->pages_evicted_rate += pool_info->pages_evicted_rate; total_info->unzip_lru_len += pool_info->unzip_lru_len; @@ -4470,6 +4476,7 @@ buf_stats_get_pool_info( pool_info->n_page_gets = buf_pool->stat.n_page_gets; + pool_info->n_ra_pages_read_rnd = buf_pool->stat.n_ra_pages_read_rnd; pool_info->n_ra_pages_read = buf_pool->stat.n_ra_pages_read; pool_info->n_ra_pages_evicted = buf_pool->stat.n_ra_pages_evicted; @@ -4509,6 +4516,10 @@ buf_stats_get_pool_info( buf_pool->stat.n_pages_not_made_young - buf_pool->old_stat.n_pages_not_made_young; } + pool_info->pages_readahead_rnd_rate = + (buf_pool->stat.n_ra_pages_read_rnd + - buf_pool->old_stat.n_ra_pages_read_rnd) / time_elapsed; + pool_info->pages_readahead_rate = (buf_pool->stat.n_ra_pages_read @@ -4594,9 +4605,12 @@ buf_print_io_instance( /* Statistics about read ahead algorithm */ fprintf(file, "Pages read ahead %.2f/s," - " evicted without access %.2f/s\n", + " evicted without access %.2f/s," + " Random read ahead %.2f/s\n", + pool_info->pages_readahead_rate, - pool_info->pages_evicted_rate); + pool_info->pages_evicted_rate, + pool_info->pages_readahead_rnd_rate); /* Print some values to help us with visualizing what is happening with LRU eviction. */ diff --git a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c index eeaa21ae9ef..da804a66b29 100644 --- a/storage/innobase/buf/buf0rea.c +++ b/storage/innobase/buf/buf0rea.c @@ -40,8 +40,10 @@ Created 11/5/1995 Heikki Tuuri #include "mysql/plugin.h" #include "mysql/service_thd_wait.h" -/** The linear read-ahead area size */ -#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA +/** There must be at least this many pages in buf_pool in the area to start +a random read-ahead */ +#define BUF_READ_AHEAD_RANDOM_THRESHOLD(b) \ + (5 + BUF_READ_AHEAD_AREA(b) / 8) /** If there are buf_pool->curr_size per the number below pending reads, then read-ahead is not done: this is to prevent flooding the buffer pool with @@ -161,6 +163,171 @@ buf_read_page_low( return(1); } +/********************************************************************//** +Applies a random read-ahead in buf_pool if there are at least a threshold +value of accessed pages from the random read-ahead area. Does not read any +page, not even the one at the position (space, offset), if the read-ahead +mechanism is not activated. NOTE 1: the calling thread may own latches on +pages: to avoid deadlocks this function must be written such that it cannot +end up waiting for these latches! NOTE 2: the calling thread must want +access to the page given: this rule is set to prevent unintended read-aheads +performed by ibuf routines, a situation which could result in a deadlock if +the OS does not support asynchronous i/o. +@return number of page read requests issued; NOTE that if we read ibuf +pages, it may happen that the page at the given page number does not +get read even if we return a positive value! +@return number of page read requests issued */ +UNIV_INTERN +ulint +buf_read_ahead_random( +/*==================*/ + ulint space, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes, + or 0 */ + ulint offset, /*!< in: page number of a page which + the current thread wants to access */ + ibool inside_ibuf) /*!< in: TRUE if we are inside ibuf + routine */ +{ + buf_pool_t* buf_pool = buf_pool_get(space, offset); + ib_int64_t tablespace_version; + ulint recent_blocks = 0; + ulint ibuf_mode; + ulint count; + ulint low, high; + ulint err; + ulint i; + const ulint buf_read_ahead_random_area + = BUF_READ_AHEAD_AREA(buf_pool); + + if (!srv_random_read_ahead) { + /* Disabled by user */ + return(0); + } + + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ + return(0); + } + + if (ibuf_bitmap_page(zip_size, offset) + || trx_sys_hdr_page(space, offset)) { + + /* If it is an ibuf bitmap page or trx sys hdr, we do + no read-ahead, as that could break the ibuf page access + order */ + + return(0); + } + + /* Remember the tablespace version before we ask te tablespace size + below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we + do not try to read outside the bounds of the tablespace! */ + + tablespace_version = fil_space_get_version(space); + + low = (offset / buf_read_ahead_random_area) + * buf_read_ahead_random_area; + high = (offset / buf_read_ahead_random_area + 1) + * buf_read_ahead_random_area; + if (high > fil_space_get_size(space)) { + + high = fil_space_get_size(space); + } + + buf_pool_mutex_enter(buf_pool); + + if (buf_pool->n_pend_reads + > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) { + buf_pool_mutex_exit(buf_pool); + + return(0); + } + + /* Count how many blocks in the area have been recently accessed, + that is, reside near the start of the LRU list. */ + + for (i = low; i < high; i++) { + const buf_page_t* bpage = + buf_page_hash_get(buf_pool, space, i); + + if (bpage + && buf_page_is_accessed(bpage) + && buf_page_peek_if_young(bpage)) { + + recent_blocks++; + + if (recent_blocks + >= BUF_READ_AHEAD_RANDOM_THRESHOLD(buf_pool)) { + + buf_pool_mutex_exit(buf_pool); + goto read_ahead; + } + } + } + + buf_pool_mutex_exit(buf_pool); + /* Do nothing */ + return(0); + +read_ahead: + /* Read all the suitable blocks within the area */ + + if (inside_ibuf) { + ibuf_mode = BUF_READ_IBUF_PAGES_ONLY; + } else { + ibuf_mode = BUF_READ_ANY_PAGE; + } + + count = 0; + + for (i = low; i < high; i++) { + /* It is only sensible to do read-ahead in the non-sync aio + mode: hence FALSE as the first parameter */ + + if (!ibuf_bitmap_page(zip_size, i)) { + count += buf_read_page_low( + &err, FALSE, + ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, + space, zip_size, FALSE, + tablespace_version, i); + if (err == DB_TABLESPACE_DELETED) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: in random" + " readahead trying to access\n" + "InnoDB: tablespace %lu page %lu,\n" + "InnoDB: but the tablespace does not" + " exist or is just being dropped.\n", + (ulong) space, (ulong) i); + } + } + } + + /* In simulated aio we wake the aio handler threads only after + queuing all aio requests, in native aio the following call does + nothing: */ + + os_aio_simulated_wake_handler_threads(); + +#ifdef UNIV_DEBUG + if (buf_debug_prints && (count > 0)) { + fprintf(stderr, + "Random read-ahead space %lu offset %lu pages %lu\n", + (ulong) space, (ulong) offset, + (ulong) count); + } +#endif /* UNIV_DEBUG */ + + /* Read ahead is considered one I/O operation for the purpose of + LRU policy decision. */ + buf_LRU_stat_inc_io(); + + buf_pool->stat.n_ra_pages_read_rnd += count; + srv_buf_pool_reads += count; + return(count); +} + /********************************************************************//** High-level function which reads a page asynchronously from a file to the buffer buf_pool if it is not already there. Sets the io_fix flag and sets @@ -257,7 +424,7 @@ buf_read_ahead_linear( ulint err; ulint i; const ulint buf_read_ahead_linear_area - = BUF_READ_AHEAD_LINEAR_AREA(buf_pool); + = BUF_READ_AHEAD_AREA(buf_pool); ulint threshold; if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index c4d2226227e..aaff354c89d 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -614,6 +614,8 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG}, {"buffer_pool_pages_total", (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG}, + {"buffer_pool_read_ahead_rnd", + (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG}, {"buffer_pool_read_ahead", (char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG}, {"buffer_pool_read_ahead_evicted", @@ -11317,6 +11319,11 @@ static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, NULL, NULL, 0, 0, 1, 0); #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ +static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead, + PLUGIN_VAR_NOCMDARG, + "Whether to use read ahead for random access within an extent.", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold, PLUGIN_VAR_RQCMDARG, "Number of pages that must be accessed sequentially for InnoDB to " @@ -11385,6 +11392,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG MYSQL_SYSVAR(change_buffering_debug), #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + MYSQL_SYSVAR(random_read_ahead), MYSQL_SYSVAR(read_ahead_threshold), MYSQL_SYSVAR(io_capacity), MYSQL_SYSVAR(purge_threads), diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index b7621c679f0..98931067850 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -147,6 +147,8 @@ struct buf_pool_info_struct{ ulint n_pages_created; /*!< buf_pool->n_pages_created */ ulint n_pages_written; /*!< buf_pool->n_pages_written */ ulint n_page_gets; /*!< buf_pool->n_page_gets */ + ulint n_ra_pages_read_rnd; /*!< buf_pool->n_ra_pages_read_rnd, + number of pages readahead */ ulint n_ra_pages_read; /*!< buf_pool->n_ra_pages_read, number of pages readahead */ ulint n_ra_pages_evicted; /*!< buf_pool->n_ra_pages_evicted, @@ -171,6 +173,8 @@ struct buf_pool_info_struct{ last printout */ /* Statistics about read ahead algorithm. */ + double pages_readahead_rnd_rate;/*!< random readahead rate in pages per + second */ double pages_readahead_rate; /*!< readahead rate in pages per second */ double pages_evicted_rate; /*!< rate of readahead page evicted @@ -542,6 +546,18 @@ buf_block_get_freed_page_clock( __attribute__((pure)); /********************************************************************//** +Tells if a block is still close enough to the MRU end of the LRU list +meaning that it is not in danger of getting evicted and also implying +that it has been accessed recently. +Note that this is for heuristics only and does not reserve buffer pool +mutex. +@return TRUE if block is close to MRU end of LRU */ +UNIV_INLINE +ibool +buf_page_peek_if_young( +/*===================*/ + const buf_page_t* bpage); /*!< in: block */ +/********************************************************************//** Recommends a move of a block to the start of the LRU list if there is danger of dropping from the buffer pool. NOTE: does not reserve the buffer pool mutex. @@ -1605,6 +1621,8 @@ struct buf_pool_stat_struct{ ulint n_pages_written;/*!< number write operations */ ulint n_pages_created;/*!< number of pages created in the pool with no read */ + ulint n_ra_pages_read_rnd;/*!< number of pages read in + as part of random read ahead */ ulint n_ra_pages_read;/*!< number of pages read in as part of read ahead */ ulint n_ra_pages_evicted;/*!< number of read ahead @@ -1744,7 +1762,7 @@ struct buf_pool_struct{ UT_LIST_BASE_NODE_T(buf_page_t) LRU; /*!< base node of the LRU list */ buf_page_t* LRU_old; /*!< pointer to the about - buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV + LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV oldest blocks in the LRU list; NULL if LRU length less than BUF_LRU_OLD_MIN_LEN; diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index 4fdaf6ff43e..0e80ce55e57 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -124,6 +124,29 @@ buf_block_get_freed_page_clock( return(buf_page_get_freed_page_clock(&block->page)); } +/********************************************************************//** +Tells if a block is still close enough to the MRU end of the LRU list +meaning that it is not in danger of getting evicted and also implying +that it has been accessed recently. +Note that this is for heuristics only and does not reserve buffer pool +mutex. +@return TRUE if block is close to MRU end of LRU */ +UNIV_INLINE +ibool +buf_page_peek_if_young( +/*===================*/ + const buf_page_t* bpage) /*!< in: block */ +{ + buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + + /* FIXME: bpage->freed_page_clock is 31 bits */ + return((buf_pool->freed_page_clock & ((1UL << 31) - 1)) + < ((ulint) bpage->freed_page_clock + + (buf_pool->curr_size + * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio) + / (BUF_LRU_OLD_RATIO_DIV * 4)))); +} + /********************************************************************//** Recommends a move of a block to the start of the LRU list if there is danger of dropping from the buffer pool. NOTE: does not reserve the buffer pool @@ -154,12 +177,7 @@ buf_page_peek_if_too_old( buf_pool->stat.n_pages_not_made_young++; return(FALSE); } else { - /* FIXME: bpage->freed_page_clock is 31 bits */ - return((buf_pool->freed_page_clock & ((1UL << 31) - 1)) - > ((ulint) bpage->freed_page_clock - + (buf_pool->curr_size - * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio) - / (BUF_LRU_OLD_RATIO_DIV * 4)))); + return(!buf_page_peek_if_young(bpage)); } } diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index f42894a138c..74ef2d2dab7 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -184,7 +184,7 @@ buf_LRU_make_block_old( /*===================*/ buf_page_t* bpage); /*!< in: control block */ /**********************************************************************//** -Updates buf_LRU_old_ratio. +Updates buf_pool->LRU_old_ratio. @return updated old_pct */ UNIV_INTERN ulint @@ -193,7 +193,7 @@ buf_LRU_old_ratio_update( uint old_pct,/*!< in: Reserve this percentage of the buffer pool for "old" blocks. */ ibool adjust);/*!< in: TRUE=adjust the LRU list; - FALSE=just assign buf_LRU_old_ratio + FALSE=just assign buf_pool->LRU_old_ratio during the initialization of InnoDB */ /********************************************************************//** Update the historical stats that we are collecting for LRU eviction @@ -222,18 +222,15 @@ buf_LRU_print(void); #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ /** @name Heuristics for detecting index scan @{ */ -/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for -"old" blocks. Protected by buf_pool->mutex. */ -extern uint buf_LRU_old_ratio; -/** The denominator of buf_LRU_old_ratio. */ +/** The denominator of buf_pool->LRU_old_ratio. */ #define BUF_LRU_OLD_RATIO_DIV 1024 -/** Maximum value of buf_LRU_old_ratio. +/** Maximum value of buf_pool->LRU_old_ratio. @see buf_LRU_old_adjust_len -@see buf_LRU_old_ratio_update */ +@see buf_pool->LRU_old_ratio_update */ #define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV -/** Minimum value of buf_LRU_old_ratio. +/** Minimum value of buf_pool->LRU_old_ratio. @see buf_LRU_old_adjust_len -@see buf_LRU_old_ratio_update +@see buf_pool->LRU_old_ratio_update The minimum must exceed (BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */ #define BUF_LRU_OLD_RATIO_MIN 51 diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h index cdf6cdba3d1..cd5eff66ee8 100644 --- a/storage/innobase/include/buf0rea.h +++ b/storage/innobase/include/buf0rea.h @@ -43,6 +43,31 @@ buf_read_page( ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ ulint offset);/*!< in: page number */ /********************************************************************//** +Applies a random read-ahead in buf_pool if there are at least a threshold +value of accessed pages from the random read-ahead area. Does not read any +page, not even the one at the position (space, offset), if the read-ahead +mechanism is not activated. NOTE 1: the calling thread may own latches on +pages: to avoid deadlocks this function must be written such that it cannot +end up waiting for these latches! NOTE 2: the calling thread must want +access to the page given: this rule is set to prevent unintended read-aheads +performed by ibuf routines, a situation which could result in a deadlock if +the OS does not support asynchronous i/o. +@return number of page read requests issued; NOTE that if we read ibuf +pages, it may happen that the page at the given page number does not +get read even if we return a positive value! +@return number of page read requests issued */ +UNIV_INTERN +ulint +buf_read_ahead_random( +/*==================*/ + ulint space, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes, + or 0 */ + ulint offset, /*!< in: page number of a page which + the current thread wants to access */ + ibool inside_ibuf); /*!< in: TRUE if we are inside ibuf + routine */ +/********************************************************************//** Applies linear read-ahead if in the buf_pool the page is a border page of a linear read-ahead area and all the pages in the area have been accessed. Does not read any page if the read-ahead mechanism is not activated. Note diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index fc9401a12f5..7a93548cb03 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -158,6 +158,7 @@ extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; extern ulint srv_n_file_io_threads; +extern my_bool srv_random_read_ahead; extern ulong srv_read_ahead_threshold; extern ulint srv_n_read_io_threads; extern ulint srv_n_write_io_threads; @@ -703,6 +704,7 @@ struct export_var_struct{ ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */ ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */ ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */ + ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */ ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */ ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/ ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */ diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 04503d25cd4..c7427abdddd 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -222,6 +222,8 @@ UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX; UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX; UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX; +/* Switch to enable random read ahead. */ +UNIV_INTERN my_bool srv_random_read_ahead = FALSE; /* User settable value of the number of pages that must be present in the buffer cache and accessed sequentially for InnoDB to trigger a readahead request. */ @@ -2032,6 +2034,8 @@ srv_export_innodb_status(void) export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free; export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed; export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads; + export_vars.innodb_buffer_pool_read_ahead_rnd + = stat.n_ra_pages_read_rnd; export_vars.innodb_buffer_pool_read_ahead = stat.n_ra_pages_read; export_vars.innodb_buffer_pool_read_ahead_evicted