mirror of
https://github.com/MariaDB/server.git
synced 2025-08-31 22:22:30 +03:00
Bug 12356373 - PERFORMANCE REGRESSION FROM 5.1 TO 5.5 : GROUP BY:
The title of the bug is a little confusing. The actual fix is to reintroduce random readahead inside InnoDB with a dynamic, global switch innodb_random_read_ahead [default = off]. Approved by: Sunny Bains rb://696
This commit is contained in:
@@ -38,6 +38,14 @@ Created 11/5/1995 Heikki Tuuri
|
||||
#include "srv0start.h"
|
||||
#include "srv0srv.h"
|
||||
|
||||
/** The size in blocks of the area where the random read-ahead algorithm counts
|
||||
the accessed pages when deciding whether to read-ahead */
|
||||
#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA
|
||||
|
||||
/** There must be at least this many pages in buf_pool in the area to start
|
||||
a random read-ahead */
|
||||
#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
|
||||
|
||||
/** The linear read-ahead area size */
|
||||
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
|
||||
|
||||
@@ -157,6 +165,165 @@ buf_read_page_low(
|
||||
return(1);
|
||||
}
|
||||
|
||||
/********************************************************************//**
|
||||
Applies a random read-ahead in buf_pool if there are at least a threshold
|
||||
value of accessed pages from the random read-ahead area. Does not read any
|
||||
page, not even the one at the position (space, offset), if the read-ahead
|
||||
mechanism is not activated. NOTE 1: the calling thread may own latches on
|
||||
pages: to avoid deadlocks this function must be written such that it cannot
|
||||
end up waiting for these latches! NOTE 2: the calling thread must want
|
||||
access to the page given: this rule is set to prevent unintended read-aheads
|
||||
performed by ibuf routines, a situation which could result in a deadlock if
|
||||
the OS does not support asynchronous i/o.
|
||||
@return number of page read requests issued; NOTE that if we read ibuf
|
||||
pages, it may happen that the page at the given page number does not
|
||||
get read even if we return a positive value! */
|
||||
static
|
||||
ulint
|
||||
buf_read_ahead_random(
|
||||
/*==================*/
|
||||
ulint space, /*!< in: space id */
|
||||
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
|
||||
ulint offset) /*!< in: page number of a page which the current thread
|
||||
wants to access */
|
||||
{
|
||||
ib_int64_t tablespace_version;
|
||||
ulint recent_blocks = 0;
|
||||
ulint count;
|
||||
ulint ibuf_mode;
|
||||
ulint low, high;
|
||||
ulint err;
|
||||
ulint i;
|
||||
ulint buf_read_ahead_random_area;
|
||||
|
||||
if (!srv_random_read_ahead) {
|
||||
/* Disabled by user */
|
||||
return(0);
|
||||
}
|
||||
|
||||
if (srv_startup_is_before_trx_rollback_phase) {
|
||||
/* No read-ahead to avoid thread deadlocks */
|
||||
return(0);
|
||||
}
|
||||
|
||||
if (ibuf_bitmap_page(zip_size, offset)
|
||||
|| trx_sys_hdr_page(space, offset)) {
|
||||
|
||||
/* If it is an ibuf bitmap page or trx sys hdr, we do
|
||||
no read-ahead, as that could break the ibuf page access
|
||||
order */
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
/* Remember the tablespace version before we ask the tablespace size
|
||||
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
|
||||
do not try to read outside the bounds of the tablespace! */
|
||||
|
||||
tablespace_version = fil_space_get_version(space);
|
||||
|
||||
buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
|
||||
|
||||
low = (offset / buf_read_ahead_random_area)
|
||||
* buf_read_ahead_random_area;
|
||||
high = (offset / buf_read_ahead_random_area + 1)
|
||||
* buf_read_ahead_random_area;
|
||||
if (high > fil_space_get_size(space)) {
|
||||
|
||||
high = fil_space_get_size(space);
|
||||
}
|
||||
|
||||
buf_pool_mutex_enter();
|
||||
|
||||
if (buf_pool->n_pend_reads
|
||||
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
|
||||
buf_pool_mutex_exit();
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
/* Count how many blocks in the area have been recently accessed,
|
||||
that is, reside near the start of the LRU list. */
|
||||
|
||||
for (i = low; i < high; i++) {
|
||||
const buf_page_t* bpage = buf_page_hash_get(space, i);
|
||||
|
||||
if (bpage
|
||||
&& buf_page_is_accessed(bpage)
|
||||
&& buf_page_peek_if_young(bpage)) {
|
||||
|
||||
recent_blocks++;
|
||||
|
||||
if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
|
||||
|
||||
buf_pool_mutex_exit();
|
||||
goto read_ahead;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buf_pool_mutex_exit();
|
||||
/* Do nothing */
|
||||
return(0);
|
||||
|
||||
read_ahead:
|
||||
/* Read all the suitable blocks within the area */
|
||||
|
||||
if (ibuf_inside()) {
|
||||
ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
|
||||
} else {
|
||||
ibuf_mode = BUF_READ_ANY_PAGE;
|
||||
}
|
||||
|
||||
count = 0;
|
||||
|
||||
for (i = low; i < high; i++) {
|
||||
/* It is only sensible to do read-ahead in the non-sync aio
|
||||
mode: hence FALSE as the first parameter */
|
||||
|
||||
if (!ibuf_bitmap_page(zip_size, i)) {
|
||||
count += buf_read_page_low(
|
||||
&err, FALSE,
|
||||
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
|
||||
space, zip_size, FALSE,
|
||||
tablespace_version, i);
|
||||
if (err == DB_TABLESPACE_DELETED) {
|
||||
ut_print_timestamp(stderr);
|
||||
fprintf(stderr,
|
||||
" InnoDB: Warning: in random"
|
||||
" readahead trying to access\n"
|
||||
"InnoDB: tablespace %lu page %lu,\n"
|
||||
"InnoDB: but the tablespace does not"
|
||||
" exist or is just being dropped.\n",
|
||||
(ulong) space, (ulong) i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* In simulated aio we wake the aio handler threads only after
|
||||
queuing all aio requests, in native aio the following call does
|
||||
nothing: */
|
||||
|
||||
os_aio_simulated_wake_handler_threads();
|
||||
|
||||
#ifdef UNIV_DEBUG
|
||||
if (buf_debug_prints && (count > 0)) {
|
||||
fprintf(stderr,
|
||||
"Random read-ahead space %lu offset %lu pages %lu\n",
|
||||
(ulong) space, (ulong) offset,
|
||||
(ulong) count);
|
||||
}
|
||||
#endif /* UNIV_DEBUG */
|
||||
|
||||
/* Read ahead is considered one I/O operation for the purpose of
|
||||
LRU policy decision. */
|
||||
buf_LRU_stat_inc_io();
|
||||
|
||||
buf_pool->stat.n_ra_pages_read_rnd += count;
|
||||
return(count);
|
||||
}
|
||||
|
||||
|
||||
/********************************************************************//**
|
||||
High-level function which reads a page asynchronously from a file to the
|
||||
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
|
||||
@@ -175,6 +342,9 @@ buf_read_page(
|
||||
ulint count;
|
||||
ulint err;
|
||||
|
||||
count = buf_read_ahead_random(space, zip_size, offset);
|
||||
srv_buf_pool_reads += count;
|
||||
|
||||
tablespace_version = fil_space_get_version(space);
|
||||
|
||||
/* We do the i/o in the synchronous aio mode to save thread
|
||||
|
Reference in New Issue
Block a user