1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-22871: Reduce InnoDB buf_pool.page_hash contention

The rw_lock_s_lock() calls for the buf_pool.page_hash became a
clear bottleneck after MDEV-15053 reduced the contention on
buf_pool.mutex. We will replace that use of rw_lock_t with a
special implementation that is optimized for memory bus traffic.

The hash_table_locks instrumentation will be removed.

buf_pool_t::page_hash: Use a special implementation whose API is
compatible with hash_table_t, and store the custom rw-locks
directly in buf_pool.page_hash.array, intentionally sharing
cache lines with the hash table pointers.

rw_lock: A low-level rw-lock implementation based on std::atomic<uint32_t>
where read_trylock() becomes a simple fetch_add(1).

buf_pool_t::page_hash_latch: The special of rw_lock for the page_hash.

buf_pool_t::page_hash_latch::read_lock(): Assert that buf_pool.mutex
is not being held by the caller.

buf_pool_t::page_hash_latch::write_lock() may be called while not holding
buf_pool.mutex. buf_pool_t::watch_set() is such a caller.

buf_pool_t::page_hash_latch::read_lock_wait(),
page_hash_latch::write_lock_wait(): The spin loops.
These will obey the global parameters innodb_sync_spin_loops and
innodb_sync_spin_wait_delay.

buf_pool_t::freed_page_hash: A singly linked list of copies of
buf_pool.page_hash that ever existed. The fact that we never
free any buf_pool.page_hash.array guarantees that all
page_hash_latch that ever existed will remain valid until shutdown.

buf_pool_t::resize_hash(): Replaces buf_pool_resize_hash().
Prepend a shallow copy of the old page_hash to freed_page_hash.

buf_pool_t::page_hash_table::n_cells: Declare as Atomic_relaxed.

buf_pool_t::page_hash_table::lock(): Explain what prevents a
race condition with buf_pool_t::resize_hash().
This commit is contained in:
Marko Mäkelä
2020-06-18 13:38:30 +03:00
parent cfd3d70ccb
commit 5155a300fa
23 changed files with 478 additions and 327 deletions

View File

@@ -17,7 +17,6 @@ wait/synch/sxlock/innodb/dict_table_stats
wait/synch/sxlock/innodb/fil_space_latch wait/synch/sxlock/innodb/fil_space_latch
wait/synch/sxlock/innodb/fts_cache_init_rw_lock wait/synch/sxlock/innodb/fts_cache_init_rw_lock
wait/synch/sxlock/innodb/fts_cache_rw_lock wait/synch/sxlock/innodb/fts_cache_rw_lock
wait/synch/sxlock/innodb/hash_table_locks
wait/synch/sxlock/innodb/index_online_log wait/synch/sxlock/innodb/index_online_log
wait/synch/sxlock/innodb/index_tree_rw_lock wait/synch/sxlock/innodb/index_tree_rw_lock
wait/synch/sxlock/innodb/trx_i_s_cache_lock wait/synch/sxlock/innodb/trx_i_s_cache_lock

View File

@@ -486,7 +486,6 @@ insert into test.sanity values
("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_OPTIMIZE_FULLTEXT_ONLY"), ("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_OPTIMIZE_FULLTEXT_ONLY"),
("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PAGE_CLEANERS"), ("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PAGE_CLEANERS"),
("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PAGE_CLEANER_DISABLED_DEBUG"), ("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PAGE_CLEANER_DISABLED_DEBUG"),
("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PAGE_HASH_LOCKS"),
("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PAGE_SIZE"), ("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PAGE_SIZE"),
("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PRINT_ALL_DEADLOCKS"), ("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PRINT_ALL_DEADLOCKS"),
("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PURGE_BATCH_SIZE"), ("JUNK: GLOBAL-ONLY", "I_S.SESSION_VARIABLES", "INNODB_PURGE_BATCH_SIZE"),

View File

@@ -1,24 +0,0 @@
select @@global.innodb_page_hash_locks between 1 and 1024;
@@global.innodb_page_hash_locks between 1 and 1024
1
select @@global.innodb_page_hash_locks;
@@global.innodb_page_hash_locks
64
select @@session.innodb_page_hash_locks;
ERROR HY000: Variable 'innodb_page_hash_locks' is a GLOBAL variable
show global variables like 'innodb_page_hash_locks';
Variable_name Value
innodb_page_hash_locks 64
show session variables like 'innodb_page_hash_locks';
Variable_name Value
innodb_page_hash_locks 64
select * from information_schema.global_variables where variable_name='innodb_page_hash_locks';
VARIABLE_NAME VARIABLE_VALUE
INNODB_PAGE_HASH_LOCKS 64
select * from information_schema.session_variables where variable_name='innodb_page_hash_locks';
VARIABLE_NAME VARIABLE_VALUE
INNODB_PAGE_HASH_LOCKS 64
set global innodb_page_hash_locks=1;
ERROR HY000: Variable 'innodb_page_hash_locks' is a read only variable
set @@session.innodb_page_hash_locks='some';
ERROR HY000: Variable 'innodb_page_hash_locks' is a read only variable

View File

@@ -380,15 +380,6 @@
VARIABLE_COMMENT Deprecated parameter with no effect. VARIABLE_COMMENT Deprecated parameter with no effect.
NUMERIC_MIN_VALUE 0 NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 64 NUMERIC_MAX_VALUE 64
@@ -1513,7 +1513,7 @@
SESSION_VALUE NULL
DEFAULT_VALUE 16
VARIABLE_SCOPE GLOBAL
-VARIABLE_TYPE BIGINT UNSIGNED
+VARIABLE_TYPE INT UNSIGNED
VARIABLE_COMMENT Number of rw_locks protecting buffer pool page_hash. Rounded up to the next power of 2
NUMERIC_MIN_VALUE 1
NUMERIC_MAX_VALUE 1024
@@ -1525,7 +1525,7 @@ @@ -1525,7 +1525,7 @@
SESSION_VALUE NULL SESSION_VALUE NULL
DEFAULT_VALUE 16384 DEFAULT_VALUE 16384

View File

@@ -1509,18 +1509,6 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST OFF,ON ENUM_VALUE_LIST OFF,ON
READ_ONLY NO READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_PAGE_HASH_LOCKS
SESSION_VALUE NULL
DEFAULT_VALUE 64
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT Number of rw_locks protecting buffer pool page_hash. Rounded up to the next power of 2
NUMERIC_MIN_VALUE 1
NUMERIC_MAX_VALUE 1024
NUMERIC_BLOCK_SIZE 0
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_PAGE_SIZE VARIABLE_NAME INNODB_PAGE_SIZE
SESSION_VALUE NULL SESSION_VALUE NULL
DEFAULT_VALUE 16384 DEFAULT_VALUE 16384

View File

@@ -1,24 +0,0 @@
--source include/have_innodb.inc
--source include/have_debug.inc
#
# exists as global only
#
select @@global.innodb_page_hash_locks between 1 and 1024;
select @@global.innodb_page_hash_locks;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
select @@session.innodb_page_hash_locks;
show global variables like 'innodb_page_hash_locks';
show session variables like 'innodb_page_hash_locks';
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_page_hash_locks';
select * from information_schema.session_variables where variable_name='innodb_page_hash_locks';
--enable_warnings
#
# show that it's read-only
#
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
set global innodb_page_hash_locks=1;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
set @@session.innodb_page_hash_locks='some';

View File

@@ -1085,15 +1085,15 @@ fail:
buf_block_t* block = buf_pool.block_from_ahi(rec); buf_block_t* block = buf_pool.block_from_ahi(rec);
if (!ahi_latch) { if (!ahi_latch) {
rw_lock_t* hash_lock = buf_pool.hash_lock_get( page_hash_latch* hash_lock = buf_pool.hash_lock_get(
block->page.id()); block->page.id());
rw_lock_s_lock(hash_lock); hash_lock->read_lock();
if (block->page.state() == BUF_BLOCK_REMOVE_HASH) { if (block->page.state() == BUF_BLOCK_REMOVE_HASH) {
/* Another thread is just freeing the block /* Another thread is just freeing the block
from the LRU list of the buffer pool: do not from the LRU list of the buffer pool: do not
try to access this page. */ try to access this page. */
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
goto fail; goto fail;
} }
@@ -1104,7 +1104,7 @@ fail:
DBUG_ASSERT(fail || block->page.status != buf_page_t::FREED); DBUG_ASSERT(fail || block->page.status != buf_page_t::FREED);
buf_block_buf_fix_inc(block, __FILE__, __LINE__); buf_block_buf_fix_inc(block, __FILE__, __LINE__);
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
block->page.set_accessed(); block->page.set_accessed();
buf_page_make_young_if_needed(&block->page); buf_page_make_young_if_needed(&block->page);

View File

@@ -555,8 +555,8 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
return false; return false;
} }
rw_lock_t * hash_lock = buf_pool.hash_lock_get_low(fold); page_hash_latch *hash_lock = buf_pool.page_hash.lock_get(fold);
rw_lock_x_lock(hash_lock); hash_lock->write_lock();
if (bpage->can_relocate()) { if (bpage->can_relocate()) {
/* Relocate the compressed page. */ /* Relocate the compressed page. */
@@ -567,7 +567,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
memcpy(dst, src, size); memcpy(dst, src, size);
bpage->zip.data = reinterpret_cast<page_zip_t*>(dst); bpage->zip.data = reinterpret_cast<page_zip_t*>(dst);
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
buf_buddy_mem_invalid( buf_buddy_mem_invalid(
reinterpret_cast<buf_buddy_free_t*>(src), i); reinterpret_cast<buf_buddy_free_t*>(src), i);
@@ -578,7 +578,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
return(true); return(true);
} }
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
return(false); return(false);
} }

View File

@@ -40,6 +40,7 @@ Created 11/5/1995 Heikki Tuuri
#include <string.h> #include <string.h>
#ifndef UNIV_INNOCHECKSUM #ifndef UNIV_INNOCHECKSUM
#include "my_cpu.h"
#include "mem0mem.h" #include "mem0mem.h"
#include "btr0btr.h" #include "btr0btr.h"
#include "fil0fil.h" #include "fil0fil.h"
@@ -278,6 +279,47 @@ the read requests for the whole area.
*/ */
#ifndef UNIV_INNOCHECKSUM #ifndef UNIV_INNOCHECKSUM
void page_hash_latch::read_lock_wait()
{
auto l= read_lock_yield();
/* First, try busy spinning for a while. */
for (auto spin= srv_n_spin_wait_rounds; spin--; )
{
if (l & WRITER_PENDING)
ut_delay(srv_spin_wait_delay);
if (read_trylock())
return;
l= read_lock_yield();
}
/* Fall back to yielding to other threads. */
for (;;)
{
if (l & WRITER_PENDING)
os_thread_yield();
if (read_trylock())
return;
l= read_lock_yield();
}
}
void page_hash_latch::write_lock_wait()
{
write_lock_wait_start();
/* First, try busy spinning for a while. */
for (auto spin= srv_n_spin_wait_rounds; spin--; )
{
if (write_lock_poll())
return;
ut_delay(srv_spin_wait_delay);
}
/* Fall back to yielding to other threads. */
do
os_thread_yield();
while (!write_lock_poll());
}
/** Value in microseconds */ /** Value in microseconds */
constexpr int WAIT_FOR_READ= 100; constexpr int WAIT_FOR_READ= 100;
constexpr int WAIT_FOR_WRITE= 100; constexpr int WAIT_FOR_WRITE= 100;
@@ -1441,6 +1483,15 @@ static void buf_block_free_mutexes(buf_block_t* block)
ut_d(ut_free(block->debug_latch)); ut_d(ut_free(block->debug_latch));
} }
/** Create the hash table.
@param n the lower bound of n_cells */
void buf_pool_t::page_hash_table::create(ulint n)
{
n_cells= ut_find_prime(n);
array= static_cast<hash_cell_t*>
(ut_zalloc_nokey(pad(n_cells) * sizeof *array));
}
/** Create the buffer pool. /** Create the buffer pool.
@return whether the creation failed */ @return whether the creation failed */
bool buf_pool_t::create() bool buf_pool_t::create()
@@ -1517,16 +1568,7 @@ bool buf_pool_t::create()
n_chunks_new= n_chunks; n_chunks_new= n_chunks;
/* Number of locks protecting page_hash must be a power of two */
srv_n_page_hash_locks= my_round_up_to_next_power(static_cast<uint32_t>
(srv_n_page_hash_locks));
ut_a(srv_n_page_hash_locks != 0);
ut_a(srv_n_page_hash_locks <= MAX_PAGE_HASH_LOCKS);
page_hash.create(2 * curr_size); page_hash.create(2 * curr_size);
for (auto i= srv_n_page_hash_locks; i--; )
rw_lock_create(hash_table_locks_key, &page_hash_latches[i],
SYNC_BUF_PAGE_HASH);
zip_hash.create(2 * curr_size); zip_hash.create(2 * curr_size);
last_printout_time= time(NULL); last_printout_time= time(NULL);
@@ -1604,9 +1646,14 @@ void buf_pool_t::close()
ut_free(chunks); ut_free(chunks);
chunks= nullptr; chunks= nullptr;
for (auto i= srv_n_page_hash_locks; i--; )
rw_lock_free(&page_hash_latches[i]);
page_hash.free(); page_hash.free();
while (page_hash_table *old_page_hash= freed_page_hash)
{
freed_page_hash= static_cast<page_hash_table*>
(old_page_hash->array[1].node);
old_page_hash->free();
UT_DELETE(old_page_hash);
}
zip_hash.free(); zip_hash.free();
io_buf.close(); io_buf.close();
@@ -1632,8 +1679,8 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
} }
const page_id_t id(block->page.id()); const page_id_t id(block->page.id());
rw_lock_t* hash_lock = hash_lock_get(id); page_hash_latch* hash_lock = hash_lock_get(id);
rw_lock_x_lock(hash_lock); hash_lock->write_lock();
if (block->page.can_relocate()) { if (block->page.can_relocate()) {
memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>( memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(
@@ -1722,13 +1769,13 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
ut_ad(new_block->lock_hash_val == lock_rec_hash( ut_ad(new_block->lock_hash_val == lock_rec_hash(
id.space(), id.page_no())); id.space(), id.page_no()));
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
/* free block */ /* free block */
ut_d(block->page.set_state(BUF_BLOCK_MEMORY)); ut_d(block->page.set_state(BUF_BLOCK_MEMORY));
buf_LRU_block_free_non_file_page(block); buf_LRU_block_free_non_file_page(block);
} else { } else {
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
buf_LRU_block_free_non_file_page(new_block); buf_LRU_block_free_non_file_page(new_block);
} }
@@ -1922,30 +1969,39 @@ inline bool buf_pool_t::withdraw_blocks()
} }
/** resize page_hash and zip_hash */ /** resize page_hash and zip_hash */
static void buf_pool_resize_hash() inline void buf_pool_t::resize_hash()
{ {
hash_table_t new_hash; page_hash_table *new_page_hash= UT_NEW_NOKEY(page_hash_table());
new_hash.create(2 * buf_pool.curr_size); new_page_hash->create(2 * buf_pool.curr_size);
new_page_hash->write_lock_all();
for (ulint i= 0; i < buf_pool.page_hash.n_cells; i++) for (auto i= page_hash.pad(page_hash.n_cells); i--; )
{ {
while (buf_page_t *bpage= static_cast<buf_page_t*> static_assert(!((page_hash_table::ELEMENTS_PER_LATCH + 1) &
(HASH_GET_FIRST(&buf_pool.page_hash, i))) page_hash_table::ELEMENTS_PER_LATCH),
"must be one less than a power of 2");
if (!(i & page_hash_table::ELEMENTS_PER_LATCH))
{
ut_ad(reinterpret_cast<page_hash_latch*>
(&page_hash.array[i])->is_write_locked());
continue;
}
while (buf_page_t *bpage= static_cast<buf_page_t*>
(page_hash.array[i].node))
{ {
buf_page_t *prev_bpage= bpage;
ut_ad(bpage->in_page_hash); ut_ad(bpage->in_page_hash);
bpage= static_cast<buf_page_t*>(HASH_GET_NEXT(hash, prev_bpage)); const ulint fold= bpage->id().fold();
const ulint fold= prev_bpage->id().fold(); HASH_DELETE(buf_page_t, hash, &buf_pool.page_hash, fold, bpage);
HASH_DELETE(buf_page_t, hash, &buf_pool.page_hash, fold, prev_bpage); HASH_INSERT(buf_page_t, hash, new_page_hash, fold, bpage);
HASH_INSERT(buf_page_t, hash, &new_hash, fold, prev_bpage);
} }
} }
std::swap(buf_pool.page_hash.array, new_hash.array); buf_pool.page_hash.array[1].node= freed_page_hash;
buf_pool.page_hash.n_cells= new_hash.n_cells; std::swap(buf_pool.page_hash, *new_page_hash);
new_hash.free(); freed_page_hash= new_page_hash;
/* recreate zip_hash */ /* recreate zip_hash */
hash_table_t new_hash;
new_hash.create(2 * buf_pool.curr_size); new_hash.create(2 * buf_pool.curr_size);
for (ulint i= 0; i < buf_pool.zip_hash.n_cells; i++) for (ulint i= 0; i < buf_pool.zip_hash.n_cells; i++)
@@ -1953,11 +2009,9 @@ static void buf_pool_resize_hash()
while (buf_page_t *bpage= static_cast<buf_page_t*> while (buf_page_t *bpage= static_cast<buf_page_t*>
(HASH_GET_FIRST(&buf_pool.zip_hash, i))) (HASH_GET_FIRST(&buf_pool.zip_hash, i)))
{ {
buf_page_t *prev_bpage= bpage; const ulint fold= BUF_POOL_ZIP_FOLD_BPAGE(bpage);
bpage= static_cast<buf_page_t*>(HASH_GET_NEXT(hash, prev_bpage)); HASH_DELETE(buf_page_t, hash, &buf_pool.zip_hash, fold, bpage);
const ulint fold= BUF_POOL_ZIP_FOLD_BPAGE(prev_bpage); HASH_INSERT(buf_page_t, hash, &new_hash, fold, bpage);
HASH_DELETE(buf_page_t, hash, &buf_pool.zip_hash, fold, prev_bpage);
HASH_INSERT(buf_page_t, hash, &new_hash, fold, prev_bpage);
} }
} }
@@ -1967,6 +2021,49 @@ static void buf_pool_resize_hash()
} }
inline void buf_pool_t::page_hash_table::write_lock_all()
{
for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1)
{
reinterpret_cast<page_hash_latch&>(array[n]).write_lock();
if (!n)
break;
}
}
inline void buf_pool_t::page_hash_table::write_unlock_all()
{
for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1)
{
reinterpret_cast<page_hash_latch&>(array[n]).write_unlock();
if (!n)
break;
}
}
inline void buf_pool_t::write_lock_all_page_hash()
{
ut_ad(mutex_own(&mutex));
page_hash.write_lock_all();
for (page_hash_table *old_page_hash= freed_page_hash; old_page_hash;
old_page_hash= static_cast<page_hash_table*>
(old_page_hash->array[1].node))
old_page_hash->write_lock_all();
}
inline void buf_pool_t::write_unlock_all_page_hash()
{
page_hash.write_unlock_all();
for (page_hash_table *old_page_hash= freed_page_hash; old_page_hash;
old_page_hash= static_cast<page_hash_table*>
(old_page_hash->array[1].node))
old_page_hash->write_unlock_all();
}
/** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */ /** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */
inline void buf_pool_t::resize() inline void buf_pool_t::resize()
{ {
@@ -2131,8 +2228,7 @@ withdraw_retry:
resizing.store(true, std::memory_order_relaxed); resizing.store(true, std::memory_order_relaxed);
mutex_enter(&mutex); mutex_enter(&mutex);
for (auto i= srv_n_page_hash_locks; i--; ) write_lock_all_page_hash();
rw_lock_x_lock(&page_hash_latches[i]);
chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map()); chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map());
@@ -2278,13 +2374,12 @@ calc_buf_pool_size:
if the new size is too different */ if the new size is too different */
if (!warning && new_size_too_diff) { if (!warning && new_size_too_diff) {
buf_resize_status("Resizing hash table"); buf_resize_status("Resizing hash table");
buf_pool_resize_hash(); resize_hash();
ib::info() << "hash tables were resized"; ib::info() << "hash tables were resized";
} }
mutex_exit(&mutex); mutex_exit(&mutex);
for (auto i= srv_n_page_hash_locks; i--; ) write_unlock_all_page_hash();
rw_lock_x_unlock(&page_hash_latches[i]);
UT_DELETE(chunk_map_old); UT_DELETE(chunk_map_old);
@@ -2390,7 +2485,7 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage)
const ulint fold= bpage->id().fold(); const ulint fold= bpage->id().fold();
ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE); ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE);
ut_ad(mutex_own(&buf_pool.mutex)); ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(rw_lock_own(buf_pool.hash_lock_get(bpage->id()), RW_LOCK_X)); ut_ad(buf_pool.hash_lock_get(bpage->id())->is_write_locked());
ut_a(bpage->io_fix() == BUF_IO_NONE); ut_a(bpage->io_fix() == BUF_IO_NONE);
ut_a(!bpage->buf_fix_count()); ut_a(!bpage->buf_fix_count());
ut_ad(bpage == buf_pool.page_hash_get_low(bpage->id(), fold)); ut_ad(bpage == buf_pool.page_hash_get_low(bpage->id(), fold));
@@ -2443,11 +2538,11 @@ relocated, and reacquired.
@return a buffer pool block corresponding to id @return a buffer pool block corresponding to id
@retval nullptr if the block was not present, and a watch was installed */ @retval nullptr if the block was not present, and a watch was installed */
inline buf_page_t *buf_pool_t::watch_set(const page_id_t id, inline buf_page_t *buf_pool_t::watch_set(const page_id_t id,
rw_lock_t **hash_lock) page_hash_latch **hash_lock)
{ {
const ulint fold= id.fold(); const ulint fold= id.fold();
ut_ad(*hash_lock == hash_lock_get_low(fold)); ut_ad(*hash_lock == page_hash.lock_get(fold));
ut_ad(rw_lock_own(*hash_lock, RW_LOCK_X)); ut_ad((*hash_lock)->is_write_locked());
retry: retry:
if (buf_page_t *bpage= page_hash_get_low(id, fold)) if (buf_page_t *bpage= page_hash_get_low(id, fold))
@@ -2460,7 +2555,7 @@ retry:
return nullptr; return nullptr;
} }
rw_lock_x_unlock(*hash_lock); (*hash_lock)->write_unlock();
/* Allocate a watch[] and then try to insert it into the page_hash. */ /* Allocate a watch[] and then try to insert it into the page_hash. */
mutex_enter(&mutex); mutex_enter(&mutex);
@@ -2484,18 +2579,18 @@ retry:
w->set_state(BUF_BLOCK_ZIP_PAGE); w->set_state(BUF_BLOCK_ZIP_PAGE);
w->id_= id; w->id_= id;
*hash_lock= hash_lock_get_low(fold); *hash_lock= page_hash.lock_get(fold);
rw_lock_x_lock(*hash_lock); (*hash_lock)->write_lock();
mutex_exit(&mutex); mutex_exit(&mutex);
buf_page_t *bpage= page_hash_get_low(id, fold); buf_page_t *bpage= page_hash_get_low(id, fold);
if (UNIV_LIKELY_NULL(bpage)) if (UNIV_LIKELY_NULL(bpage))
{ {
rw_lock_x_unlock(*hash_lock); (*hash_lock)->write_unlock();
mutex_enter(&mutex); mutex_enter(&mutex);
w->set_state(BUF_BLOCK_NOT_USED); w->set_state(BUF_BLOCK_NOT_USED);
*hash_lock= hash_lock_get_low(fold); *hash_lock= page_hash.lock_get(fold);
rw_lock_x_lock(*hash_lock); (*hash_lock)->write_lock();
mutex_exit(&mutex); mutex_exit(&mutex);
goto retry; goto retry;
} }
@@ -2533,7 +2628,7 @@ void buf_page_free(const page_id_t page_id,
buf_pool.stat.n_page_gets++; buf_pool.stat.n_page_gets++;
const ulint fold= page_id.fold(); const ulint fold= page_id.fold();
rw_lock_t *hash_lock= buf_pool.page_hash_lock<false>(fold); page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
buf_block_t *block= reinterpret_cast<buf_block_t*> buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id, fold)); (buf_pool.page_hash_get_low(page_id, fold));
@@ -2544,7 +2639,7 @@ void buf_page_free(const page_id_t page_id,
{ {
/* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE, /* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE,
but avoid buf_zip_decompress() */ but avoid buf_zip_decompress() */
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
return; return;
} }
@@ -2559,7 +2654,7 @@ void buf_page_free(const page_id_t page_id,
block->page.status= buf_page_t::FREED; block->page.status= buf_page_t::FREED;
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
} }
/** Get read access to a compressed page (usually of type /** Get read access to a compressed page (usually of type
@@ -2581,7 +2676,7 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
bool discard_attempted= false; bool discard_attempted= false;
const ulint fold= page_id.fold(); const ulint fold= page_id.fold();
buf_page_t *bpage; buf_page_t *bpage;
rw_lock_t *hash_lock; page_hash_latch *hash_lock;
for (;;) for (;;)
{ {
@@ -2604,13 +2699,13 @@ lookup:
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
} }
ut_ad(rw_lock_own(hash_lock, RW_LOCK_S)); ut_ad(hash_lock->is_read_locked());
if (!bpage->zip.data) if (!bpage->zip.data)
{ {
/* There is no compressed page. */ /* There is no compressed page. */
err_exit: err_exit:
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
return nullptr; return nullptr;
} }
@@ -2625,7 +2720,7 @@ err_exit:
if (!discard_attempted) if (!discard_attempted)
{ {
discard_attempted= true; discard_attempted= true;
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
mutex_enter(&buf_pool.mutex); mutex_enter(&buf_pool.mutex);
if (buf_page_t *bpage= buf_pool.page_hash_get_low(page_id, fold)) if (buf_page_t *bpage= buf_pool.page_hash_get_low(page_id, fold))
buf_LRU_free_page(bpage, false); buf_LRU_free_page(bpage, false);
@@ -2645,7 +2740,7 @@ err_exit:
got_block: got_block:
bool must_read= bpage->io_fix() == BUF_IO_READ; bool must_read= bpage->io_fix() == BUF_IO_READ;
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
DBUG_ASSERT(bpage->status != buf_page_t::FREED); DBUG_ASSERT(bpage->status != buf_page_t::FREED);
@@ -2981,7 +3076,7 @@ loop:
buf_block_t* fix_block; buf_block_t* fix_block;
block = guess; block = guess;
rw_lock_t* hash_lock = buf_pool.page_hash_lock<false>(fold); page_hash_latch* hash_lock = buf_pool.page_hash.lock<false>(fold);
if (block) { if (block) {
@@ -3006,14 +3101,14 @@ lookup:
} }
if (!block || buf_pool.watch_is_sentinel(block->page)) { if (!block || buf_pool.watch_is_sentinel(block->page)) {
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
block = nullptr; block = nullptr;
} }
if (UNIV_UNLIKELY(!block)) { if (UNIV_UNLIKELY(!block)) {
/* Page not in buf_pool: needs to be read from file */ /* Page not in buf_pool: needs to be read from file */
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
hash_lock = buf_pool.page_hash_lock<true>(fold); hash_lock = buf_pool.page_hash.lock<true>(fold);
if (buf_page_t *bpage= buf_pool.watch_set( if (buf_page_t *bpage= buf_pool.watch_set(
page_id, &hash_lock)) { page_id, &hash_lock)) {
@@ -3021,13 +3116,13 @@ lookup:
increment the fix count to make increment the fix count to make
sure that no state change takes place. */ sure that no state change takes place. */
bpage->fix(); bpage->fix();
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
block = reinterpret_cast<buf_block_t*>(bpage); block = reinterpret_cast<buf_block_t*>(bpage);
fix_block = block; fix_block = block;
goto got_block; goto got_block;
} }
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
} }
switch (mode) { switch (mode) {
@@ -3121,7 +3216,7 @@ lookup:
} }
fix_block->fix(); fix_block->fix();
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
got_block: got_block:
switch (mode) { switch (mode) {
@@ -3212,9 +3307,9 @@ evict_from_pool:
buf_block_init_low(block); buf_block_init_low(block);
mutex_enter(&buf_pool.mutex); mutex_enter(&buf_pool.mutex);
hash_lock = buf_pool.hash_lock_get_low(fold); hash_lock = buf_pool.page_hash.lock_get(fold);
rw_lock_x_lock(hash_lock); hash_lock->write_lock();
/* Buffer-fixing prevents the page_hash from changing. */ /* Buffer-fixing prevents the page_hash from changing. */
ut_ad(bpage == buf_pool.page_hash_get_low(page_id, fold)); ut_ad(bpage == buf_pool.page_hash_get_low(page_id, fold));
@@ -3228,7 +3323,7 @@ evict_from_pool:
This should be extremely unlikely, for example, This should be extremely unlikely, for example,
if buf_page_get_zip() was invoked. */ if buf_page_get_zip() was invoked. */
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
buf_LRU_block_free_non_file_page(block); buf_LRU_block_free_non_file_page(block);
mutex_exit(&buf_pool.mutex); mutex_exit(&buf_pool.mutex);
@@ -3276,7 +3371,7 @@ evict_from_pool:
UNIV_MEM_INVALID(bpage, sizeof *bpage); UNIV_MEM_INVALID(bpage, sizeof *bpage);
mutex_exit(&buf_pool.mutex); mutex_exit(&buf_pool.mutex);
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
buf_pool.n_pend_unzip++; buf_pool.n_pend_unzip++;
access_time = block->page.is_accessed(); access_time = block->page.is_accessed();
@@ -3312,9 +3407,6 @@ evict_from_pool:
ut_ad(block == fix_block); ut_ad(block == fix_block);
ut_ad(fix_block->page.buf_fix_count()); ut_ad(fix_block->page.buf_fix_count());
ut_ad(!rw_lock_own_flagged(hash_lock,
RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
ut_ad(fix_block->page.state() == BUF_BLOCK_FILE_PAGE); ut_ad(fix_block->page.state() == BUF_BLOCK_FILE_PAGE);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
@@ -3336,8 +3428,8 @@ evict_from_pool:
if (buf_LRU_free_page(&fix_block->page, true)) { if (buf_LRU_free_page(&fix_block->page, true)) {
space->release_for_io(); space->release_for_io();
hash_lock = buf_pool.hash_lock_get_low(fold); hash_lock = buf_pool.page_hash.lock_get(fold);
rw_lock_x_lock(hash_lock); hash_lock->write_lock();
mutex_exit(&buf_pool.mutex); mutex_exit(&buf_pool.mutex);
/* We may set the watch, as it would have /* We may set the watch, as it would have
been set if the page were not in the been set if the page were not in the
@@ -3346,7 +3438,7 @@ evict_from_pool:
mode == BUF_GET_IF_IN_POOL_OR_WATCH mode == BUF_GET_IF_IN_POOL_OR_WATCH
? buf_pool.watch_set(page_id, &hash_lock) ? buf_pool.watch_set(page_id, &hash_lock)
: buf_pool.page_hash_get_low(page_id, fold)); : buf_pool.page_hash_get_low(page_id, fold));
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
if (block != NULL) { if (block != NULL) {
/* Either the page has been read in or /* Either the page has been read in or
@@ -3467,9 +3559,6 @@ get_latch:
buf_read_ahead_linear(page_id, zip_size, ibuf_inside(mtr)); buf_read_ahead_linear(page_id, zip_size, ibuf_inside(mtr));
} }
ut_ad(!rw_lock_own_flagged(hash_lock,
RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
return(fix_block); return(fix_block);
} }
@@ -3558,17 +3647,17 @@ buf_page_optimistic_get(
return FALSE; return FALSE;
} }
rw_lock_t *hash_lock = buf_pool.hash_lock_get(block->page.id()); page_hash_latch *hash_lock = buf_pool.hash_lock_get(block->page.id());
rw_lock_s_lock(hash_lock); hash_lock->read_lock();
if (UNIV_UNLIKELY(block->page.state() != BUF_BLOCK_FILE_PAGE if (UNIV_UNLIKELY(block->page.state() != BUF_BLOCK_FILE_PAGE
|| block->page.io_fix() != BUF_IO_NONE)) { || block->page.io_fix() != BUF_IO_NONE)) {
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
return(FALSE); return(FALSE);
} }
buf_block_buf_fix_inc(block, file, line); buf_block_buf_fix_inc(block, file, line);
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
const bool first_access = block->page.set_accessed(); const bool first_access = block->page.set_accessed();
@@ -3645,7 +3734,7 @@ buf_page_try_get_func(
ut_ad(mtr); ut_ad(mtr);
ut_ad(mtr->is_active()); ut_ad(mtr->is_active());
rw_lock_t *hash_lock; page_hash_latch *hash_lock;
buf_page_t *bpage= buf_pool.page_hash_get_locked<false>(page_id, buf_page_t *bpage= buf_pool.page_hash_get_locked<false>(page_id,
page_id.fold(), page_id.fold(),
&hash_lock); &hash_lock);
@@ -3653,13 +3742,13 @@ buf_page_try_get_func(
return nullptr; return nullptr;
if (bpage->state() != BUF_BLOCK_FILE_PAGE) if (bpage->state() != BUF_BLOCK_FILE_PAGE)
{ {
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
return nullptr; return nullptr;
} }
buf_block_t *block= reinterpret_cast<buf_block_t*>(bpage); buf_block_t *block= reinterpret_cast<buf_block_t*>(bpage);
buf_block_buf_fix_inc(block, file, line); buf_block_buf_fix_inc(block, file, line);
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
mtr_memo_type_t fix_type= MTR_MEMO_PAGE_S_FIX; mtr_memo_type_t fix_type= MTR_MEMO_PAGE_S_FIX;
if (!rw_lock_s_lock_nowait(&block->lock, file, line)) if (!rw_lock_s_lock_nowait(&block->lock, file, line))
@@ -3770,8 +3859,8 @@ buf_page_create(fil_space_t *space, uint32_t offset,
/* The block must be put to the LRU list */ /* The block must be put to the LRU list */
buf_LRU_add_block(&block->page, false); buf_LRU_add_block(&block->page, false);
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id); page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
rw_lock_x_lock(hash_lock); hash_lock->write_lock();
block->page.set_state(BUF_BLOCK_FILE_PAGE); block->page.set_state(BUF_BLOCK_FILE_PAGE);
ut_d(block->page.in_page_hash= true); ut_d(block->page.in_page_hash= true);
HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, &block->page); HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, &block->page);
@@ -3783,7 +3872,7 @@ buf_page_create(fil_space_t *space, uint32_t offset,
the block. */ the block. */
block->page.set_io_fix(BUF_IO_READ); block->page.set_io_fix(BUF_IO_READ);
rw_lock_x_lock(&block->lock); rw_lock_x_lock(&block->lock);
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
/* buf_pool.mutex may be released and reacquired by /* buf_pool.mutex may be released and reacquired by
buf_buddy_alloc(). We must defer this operation until buf_buddy_alloc(). We must defer this operation until
@@ -3801,7 +3890,7 @@ buf_page_create(fil_space_t *space, uint32_t offset,
rw_lock_x_unlock(&block->lock); rw_lock_x_unlock(&block->lock);
} }
else else
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
mutex_exit(&buf_pool.mutex); mutex_exit(&buf_pool.mutex);
@@ -3954,10 +4043,10 @@ static void buf_mark_space_corrupt(buf_page_t* bpage, const fil_space_t& space)
void buf_pool_t::corrupted_evict(buf_page_t *bpage) void buf_pool_t::corrupted_evict(buf_page_t *bpage)
{ {
const page_id_t id(bpage->id()); const page_id_t id(bpage->id());
rw_lock_t *hash_lock= hash_lock_get(id); page_hash_latch *hash_lock= hash_lock_get(id);
mutex_enter(&mutex); mutex_enter(&mutex);
rw_lock_x_lock(hash_lock); hash_lock->write_lock();
ut_ad(bpage->io_fix() == BUF_IO_READ); ut_ad(bpage->io_fix() == BUF_IO_READ);
ut_ad(!bpage->oldest_modification()); ut_ad(!bpage->oldest_modification());

View File

@@ -141,7 +141,7 @@ caller needs to free the page to the free list
@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In @retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
this case the block is already returned to the buddy allocator. */ this case the block is already returned to the buddy allocator. */
static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
rw_lock_t *hash_lock, bool zip); page_hash_latch *hash_lock, bool zip);
/** Free a block to buf_pool */ /** Free a block to buf_pool */
static void buf_LRU_block_free_hashed_page(buf_block_t *block) static void buf_LRU_block_free_hashed_page(buf_block_t *block)
@@ -1160,8 +1160,8 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
bpage->can_relocate() from changing due to a concurrent bpage->can_relocate() from changing due to a concurrent
execution of buf_page_get_low(). */ execution of buf_page_get_low(). */
const ulint fold = id.fold(); const ulint fold = id.fold();
rw_lock_t* hash_lock = buf_pool.hash_lock_get_low(fold); page_hash_latch* hash_lock = buf_pool.page_hash.lock_get(fold);
rw_lock_x_lock(hash_lock); hash_lock->write_lock();
if (UNIV_UNLIKELY(!bpage->can_relocate())) { if (UNIV_UNLIKELY(!bpage->can_relocate())) {
/* Do not free buffer fixed and I/O-fixed blocks. */ /* Do not free buffer fixed and I/O-fixed blocks. */
@@ -1178,7 +1178,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
} else if (bpage->oldest_modification() } else if (bpage->oldest_modification()
&& bpage->state() != BUF_BLOCK_FILE_PAGE) { && bpage->state() != BUF_BLOCK_FILE_PAGE) {
func_exit: func_exit:
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
return(false); return(false);
} else if (bpage->state() == BUF_BLOCK_FILE_PAGE) { } else if (bpage->state() == BUF_BLOCK_FILE_PAGE) {
@@ -1201,10 +1201,6 @@ func_exit:
return(true); return(true);
} }
/* buf_LRU_block_remove_hashed() releases the hash_lock */
ut_ad(!rw_lock_own_flagged(hash_lock,
RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
/* We have just freed a BUF_BLOCK_FILE_PAGE. If b != nullptr /* We have just freed a BUF_BLOCK_FILE_PAGE. If b != nullptr
then it was a compressed page with an uncompressed frame and then it was a compressed page with an uncompressed frame and
we are interested in freeing only the uncompressed frame. we are interested in freeing only the uncompressed frame.
@@ -1215,7 +1211,7 @@ func_exit:
if (UNIV_LIKELY_NULL(b)) { if (UNIV_LIKELY_NULL(b)) {
buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b); buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
rw_lock_x_lock(hash_lock); hash_lock->write_lock();
ut_ad(!buf_pool.page_hash_get_low(id, fold)); ut_ad(!buf_pool.page_hash_get_low(id, fold));
ut_ad(b->zip_size()); ut_ad(b->zip_size());
@@ -1301,7 +1297,7 @@ func_exit:
decompressing the block while we release decompressing the block while we release
hash_lock. */ hash_lock. */
b->set_io_fix(BUF_IO_PIN); b->set_io_fix(BUF_IO_PIN);
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
} }
mutex_exit(&buf_pool.mutex); mutex_exit(&buf_pool.mutex);
@@ -1405,10 +1401,10 @@ caller needs to free the page to the free list
@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In @retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
this case the block is already returned to the buddy allocator. */ this case the block is already returned to the buddy allocator. */
static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
rw_lock_t *hash_lock, bool zip) page_hash_latch *hash_lock, bool zip)
{ {
ut_ad(mutex_own(&buf_pool.mutex)); ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(rw_lock_own(hash_lock, RW_LOCK_X)); ut_ad(hash_lock->is_write_locked());
ut_a(bpage->io_fix() == BUF_IO_NONE); ut_a(bpage->io_fix() == BUF_IO_NONE);
ut_a(!bpage->buf_fix_count()); ut_a(!bpage->buf_fix_count());
@@ -1501,7 +1497,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
UT_LIST_REMOVE(buf_pool.zip_clean, bpage); UT_LIST_REMOVE(buf_pool.zip_clean, bpage);
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
buf_pool_mutex_exit_forbid(); buf_pool_mutex_exit_forbid();
buf_buddy_free(bpage->zip.data, bpage->zip_size()); buf_buddy_free(bpage->zip.data, bpage->zip_size());
@@ -1542,7 +1538,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
and by the time we'll release it in the caller we'd and by the time we'll release it in the caller we'd
have inserted the compressed only descriptor in the have inserted the compressed only descriptor in the
page_hash. */ page_hash. */
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
if (zip && bpage->zip.data) { if (zip && bpage->zip.data) {
/* Free the compressed page. */ /* Free the compressed page. */
@@ -1578,20 +1574,15 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
@param id page identifier @param id page identifier
@param hash_lock buf_pool.page_hash latch (will be released here) */ @param hash_lock buf_pool.page_hash latch (will be released here) */
void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id, void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id,
rw_lock_t *hash_lock) page_hash_latch *hash_lock)
{ {
while (bpage->buf_fix_count()) while (bpage->buf_fix_count())
{
/* Wait for other threads to release the fix count /* Wait for other threads to release the fix count
before releasing the bpage from LRU list. */ before releasing the bpage from LRU list. */
ut_delay(1); (void) LF_BACKOFF();
}
if (buf_LRU_block_remove_hashed(bpage, id, hash_lock, true)) if (buf_LRU_block_remove_hashed(bpage, id, hash_lock, true))
buf_LRU_block_free_hashed_page(reinterpret_cast<buf_block_t*>(bpage)); buf_LRU_block_free_hashed_page(reinterpret_cast<buf_block_t*>(bpage));
/* buf_LRU_block_remove_hashed() releases hash_lock */
ut_ad(!rw_lock_own_flagged(hash_lock, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
} }
/** Update buf_pool.LRU_old_ratio. /** Update buf_pool.LRU_old_ratio.

View File

@@ -53,7 +53,7 @@ that the block has been replaced with the real block.
@param watch sentinel */ @param watch sentinel */
inline void buf_pool_t::watch_remove(buf_page_t *watch) inline void buf_pool_t::watch_remove(buf_page_t *watch)
{ {
ut_ad(rw_lock_own(hash_lock_get(watch->id()), RW_LOCK_X)); ut_ad(hash_lock_get(watch->id())->is_write_locked());
ut_a(watch_is_sentinel(*watch)); ut_a(watch_is_sentinel(*watch));
if (watch->buf_fix_count()) if (watch->buf_fix_count())
{ {
@@ -125,14 +125,14 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
/* We must acquire hash_lock this early to prevent /* We must acquire hash_lock this early to prevent
a race condition with buf_pool_t::watch_remove() */ a race condition with buf_pool_t::watch_remove() */
rw_lock_t *hash_lock= buf_pool.hash_lock_get_low(fold); page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
rw_lock_x_lock(hash_lock); hash_lock->write_lock();
buf_page_t *hash_page= buf_pool.page_hash_get_low(page_id, fold); buf_page_t *hash_page= buf_pool.page_hash_get_low(page_id, fold);
if (hash_page && !buf_pool.watch_is_sentinel(*hash_page)) if (hash_page && !buf_pool.watch_is_sentinel(*hash_page))
{ {
/* The page is already in the buffer pool. */ /* The page is already in the buffer pool. */
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
if (block) if (block)
{ {
rw_lock_x_unlock_gen(&block->lock, BUF_IO_READ); rw_lock_x_unlock_gen(&block->lock, BUF_IO_READ);
@@ -160,7 +160,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
ut_ad(!block->page.in_page_hash); ut_ad(!block->page.in_page_hash);
ut_d(block->page.in_page_hash= true); ut_d(block->page.in_page_hash= true);
HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, bpage); HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, bpage);
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
/* The block must be put to the LRU list, to the old blocks */ /* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, true/* to old blocks */); buf_LRU_add_block(bpage, true/* to old blocks */);
@@ -184,7 +184,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
} }
else else
{ {
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
/* The compressed page must be allocated before the /* The compressed page must be allocated before the
control block (bpage), in order to avoid the control block (bpage), in order to avoid the
@@ -193,7 +193,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
bool lru= false; bool lru= false;
void *data= buf_buddy_alloc(zip_size, &lru); void *data= buf_buddy_alloc(zip_size, &lru);
rw_lock_x_lock(hash_lock); hash_lock->write_lock();
/* If buf_buddy_alloc() allocated storage from the LRU list, /* If buf_buddy_alloc() allocated storage from the LRU list,
it released and reacquired buf_pool.mutex. Thus, we must it released and reacquired buf_pool.mutex. Thus, we must
@@ -205,7 +205,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
if (UNIV_UNLIKELY(hash_page && !buf_pool.watch_is_sentinel(*hash_page))) if (UNIV_UNLIKELY(hash_page && !buf_pool.watch_is_sentinel(*hash_page)))
{ {
/* The block was added by some other thread. */ /* The block was added by some other thread. */
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
buf_buddy_free(data, zip_size); buf_buddy_free(data, zip_size);
goto func_exit; goto func_exit;
} }
@@ -234,7 +234,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
ut_d(bpage->in_page_hash= true); ut_d(bpage->in_page_hash= true);
HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, bpage); HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, bpage);
bpage->set_io_fix(BUF_IO_READ); bpage->set_io_fix(BUF_IO_READ);
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
/* The block must be put to the LRU list, to the old blocks. /* The block must be put to the LRU list, to the old blocks.
The zip size is already set into the page zip */ The zip size is already set into the page zip */
@@ -253,7 +253,6 @@ func_exit_no_mutex:
if (mode == BUF_READ_IBUF_PAGES_ONLY) if (mode == BUF_READ_IBUF_PAGES_ONLY)
ibuf_mtr_commit(&mtr); ibuf_mtr_commit(&mtr);
ut_ad(!rw_lock_own_flagged(hash_lock, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
ut_ad(!bpage || bpage->in_file()); ut_ad(!bpage || bpage->in_file());
return bpage; return bpage;
@@ -426,10 +425,10 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
for (page_id_t i= low; i < high; ++i) for (page_id_t i= low; i < high; ++i)
{ {
const ulint fold= i.fold(); const ulint fold= i.fold();
rw_lock_t *hash_lock= buf_pool.page_hash_lock<false>(fold); page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
const buf_page_t* bpage= buf_pool.page_hash_get_low(i, fold); const buf_page_t *bpage= buf_pool.page_hash_get_low(i, fold);
bool found= bpage && bpage->is_accessed() && buf_page_peek_if_young(bpage); bool found= bpage && bpage->is_accessed() && buf_page_peek_if_young(bpage);
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
if (found && !--count) if (found && !--count)
goto read_ahead; goto read_ahead;
} }
@@ -620,7 +619,7 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
for (page_id_t i= low; i != high_1; ++i) for (page_id_t i= low; i != high_1; ++i)
{ {
const ulint fold= i.fold(); const ulint fold= i.fold();
rw_lock_t *hash_lock= buf_pool.page_hash_lock<false>(fold); page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
const buf_page_t* bpage= buf_pool.page_hash_get_low(i, fold); const buf_page_t* bpage= buf_pool.page_hash_get_low(i, fold);
if (i == page_id) if (i == page_id)
{ {
@@ -632,7 +631,7 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
if (!bpage) if (!bpage)
{ {
hard_fail: hard_fail:
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
space->release(); space->release();
return 0; return 0;
} }
@@ -673,7 +672,7 @@ hard_fail:
else if (!bpage) else if (!bpage)
{ {
failed: failed:
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
if (--count) if (--count)
continue; continue;
space->release(); space->release();
@@ -694,7 +693,7 @@ failed:
prev_accessed= accessed; prev_accessed= accessed;
if (fail) if (fail)
goto failed; goto failed;
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
} }
/* If we got this far, read-ahead can be sensible: do it */ /* If we got this far, read-ahead can be sensible: do it */

View File

@@ -590,8 +590,7 @@ static PSI_rwlock_info all_innodb_rwlocks[] = {
PSI_RWLOCK_KEY(trx_purge_latch), PSI_RWLOCK_KEY(trx_purge_latch),
PSI_RWLOCK_KEY(index_tree_rw_lock), PSI_RWLOCK_KEY(index_tree_rw_lock),
PSI_RWLOCK_KEY(index_online_log), PSI_RWLOCK_KEY(index_online_log),
PSI_RWLOCK_KEY(dict_table_stats), PSI_RWLOCK_KEY(dict_table_stats)
PSI_RWLOCK_KEY(hash_table_locks)
}; };
# endif /* UNIV_PFS_RWLOCK */ # endif /* UNIV_PFS_RWLOCK */
@@ -19500,11 +19499,6 @@ static MYSQL_SYSVAR_ULONG(buffer_pool_chunk_size, srv_buf_pool_chunk_unit,
128 * 1024 * 1024, 1024 * 1024, LONG_MAX, 1024 * 1024); 128 * 1024 * 1024, 1024 * 1024, LONG_MAX, 1024 * 1024);
#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
static MYSQL_SYSVAR_ULONG(page_hash_locks, srv_n_page_hash_locks,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
"Number of rw_locks protecting buffer pool page_hash. Rounded up to the next power of 2",
NULL, NULL, 64, 1, MAX_PAGE_HASH_LOCKS, 0);
static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size, static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
"Number of pages reserved in doublewrite buffer for batch flushing", "Number of pages reserved in doublewrite buffer for batch flushing",
@@ -20393,7 +20387,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(merge_threshold_set_all_debug), MYSQL_SYSVAR(merge_threshold_set_all_debug),
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
MYSQL_SYSVAR(page_hash_locks),
MYSQL_SYSVAR(doublewrite_batch_size), MYSQL_SYSVAR(doublewrite_batch_size),
#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */ #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
MYSQL_SYSVAR(status_output), MYSQL_SYSVAR(status_output),

View File

@@ -70,9 +70,6 @@ struct fil_addr_t;
#define BUF_EVICT_IF_IN_POOL 20 /*!< evict a clean block if found */ #define BUF_EVICT_IF_IN_POOL 20 /*!< evict a clean block if found */
/* @} */ /* @} */
#define MAX_PAGE_HASH_LOCKS 1024 /*!< The maximum number of
page_hash locks */
# ifdef UNIV_DEBUG # ifdef UNIV_DEBUG
extern my_bool buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing extern my_bool buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing
buffer pool is not allowed. */ buffer pool is not allowed. */
@@ -1605,47 +1602,9 @@ public:
} }
/** Get the page_hash latch for a page */ /** Get the page_hash latch for a page */
rw_lock_t *hash_lock_get(const page_id_t id) const page_hash_latch *hash_lock_get(const page_id_t id) const
{ {
return hash_lock_get_low(id.fold()); return page_hash.lock_get(id.fold());
}
/** Get a page_hash latch. */
rw_lock_t *hash_lock_get_low(ulint fold) const
{
return page_hash_latches +
ut_2pow_remainder(page_hash.calc_hash(fold),
ulint{srv_n_page_hash_locks});
}
private:
/** Get a page_hash latch. */
rw_lock_t *hash_lock_get_low(ulint fold, ulint n_cells) const
{
return page_hash_latches +
ut_2pow_remainder(ut_hash_ulint(fold, n_cells),
ulint{srv_n_page_hash_locks});
}
public:
/** Acquire a page_hash bucket latch, tolerating concurrent resize()
@tparam exclusive whether the latch is to be acquired exclusively
@param fold hash bucket key */
template<bool exclusive> rw_lock_t *page_hash_lock(ulint fold)
{
for (;;)
{
auto n_cells= page_hash.n_cells;
rw_lock_t *latch= hash_lock_get_low(fold, n_cells);
if (exclusive)
rw_lock_x_lock(latch);
else
rw_lock_s_lock(latch);
if (UNIV_LIKELY(n_cells == page_hash.n_cells))
return latch;
if (exclusive)
rw_lock_x_unlock(latch);
else
rw_lock_s_unlock(latch);
}
} }
/** Look up a block descriptor. /** Look up a block descriptor.
@@ -1656,9 +1615,7 @@ public:
buf_page_t *page_hash_get_low(const page_id_t id, const ulint fold) buf_page_t *page_hash_get_low(const page_id_t id, const ulint fold)
{ {
ut_ad(id.fold() == fold); ut_ad(id.fold() == fold);
ut_ad(mutex_own(&mutex) || ut_ad(mutex_own(&mutex) || page_hash.lock_get(fold)->is_locked());
rw_lock_own_flagged(hash_lock_get_low(fold),
RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
buf_page_t *bpage; buf_page_t *bpage;
/* Look for the page in the hash table */ /* Look for the page in the hash table */
HASH_SEARCH(hash, &page_hash, fold, buf_page_t*, bpage, HASH_SEARCH(hash, &page_hash, fold, buf_page_t*, bpage,
@@ -1676,17 +1633,14 @@ private:
@retval nullptr if no block was found; !lock || !*lock will also hold */ @retval nullptr if no block was found; !lock || !*lock will also hold */
template<bool exclusive,bool watch> template<bool exclusive,bool watch>
buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold, buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold,
rw_lock_t **hash_lock) page_hash_latch **hash_lock)
{ {
ut_ad(hash_lock || !exclusive); ut_ad(hash_lock || !exclusive);
rw_lock_t *latch= page_hash_lock<exclusive>(fold); page_hash_latch *latch= page_hash.lock<exclusive>(fold);
buf_page_t *bpage= page_hash_get_low(page_id, fold); buf_page_t *bpage= page_hash_get_low(page_id, fold);
if (!bpage || watch_is_sentinel(*bpage)) if (!bpage || watch_is_sentinel(*bpage))
{ {
if (exclusive) latch->release<exclusive>();
rw_lock_x_unlock(latch);
else
rw_lock_s_unlock(latch);
if (hash_lock) if (hash_lock)
*hash_lock= nullptr; *hash_lock= nullptr;
return watch ? bpage : nullptr; return watch ? bpage : nullptr;
@@ -1697,10 +1651,8 @@ private:
if (hash_lock) if (hash_lock)
*hash_lock= latch; /* to be released by the caller */ *hash_lock= latch; /* to be released by the caller */
else if (exclusive)
rw_lock_x_unlock(latch);
else else
rw_lock_s_unlock(latch); latch->release<exclusive>();
return bpage; return bpage;
} }
public: public:
@@ -1713,7 +1665,7 @@ public:
@retval nullptr if no block was found; !lock || !*lock will also hold */ @retval nullptr if no block was found; !lock || !*lock will also hold */
template<bool exclusive> template<bool exclusive>
buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold, buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold,
rw_lock_t **hash_lock) page_hash_latch **hash_lock)
{ return page_hash_get_locked<exclusive,false>(page_id, fold, hash_lock); } { return page_hash_get_locked<exclusive,false>(page_id, fold, hash_lock); }
/** @return whether the buffer pool contains a page /** @return whether the buffer pool contains a page
@@ -1730,9 +1682,7 @@ public:
@return whether bpage a sentinel for a buffer pool watch */ @return whether bpage a sentinel for a buffer pool watch */
bool watch_is_sentinel(const buf_page_t &bpage) bool watch_is_sentinel(const buf_page_t &bpage)
{ {
ut_ad(mutex_own(&mutex) || ut_ad(mutex_own(&mutex) || hash_lock_get(bpage.id())->is_locked());
rw_lock_own_flagged(hash_lock_get(bpage.id()),
RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
ut_ad(bpage.in_file()); ut_ad(bpage.in_file());
if (&bpage < &watch[0] || &bpage >= &watch[UT_ARR_SIZE(watch)]) if (&bpage < &watch[0] || &bpage >= &watch[UT_ARR_SIZE(watch)])
@@ -1754,11 +1704,11 @@ public:
bool watch_occurred(const page_id_t id) bool watch_occurred(const page_id_t id)
{ {
const ulint fold= id.fold(); const ulint fold= id.fold();
rw_lock_t *hash_lock= page_hash_lock<false>(fold); page_hash_latch *hash_lock= page_hash.lock<false>(fold);
/* The page must exist because watch_set() increments buf_fix_count. */ /* The page must exist because watch_set() increments buf_fix_count. */
buf_page_t *bpage= page_hash_get_low(id, fold); buf_page_t *bpage= page_hash_get_low(id, fold);
const bool is_sentinel= watch_is_sentinel(*bpage); const bool is_sentinel= watch_is_sentinel(*bpage);
rw_lock_s_unlock(hash_lock); hash_lock->read_unlock();
return !is_sentinel; return !is_sentinel;
} }
@@ -1769,7 +1719,8 @@ public:
@param hash_lock exclusively held page_hash latch @param hash_lock exclusively held page_hash latch
@return a buffer pool block corresponding to id @return a buffer pool block corresponding to id
@retval nullptr if the block was not present, and a watch was installed */ @retval nullptr if the block was not present, and a watch was installed */
inline buf_page_t *watch_set(const page_id_t id, rw_lock_t **hash_lock); inline buf_page_t *watch_set(const page_id_t id,
page_hash_latch **hash_lock);
/** Stop watching whether a page has been read in. /** Stop watching whether a page has been read in.
watch_set(id) must have returned nullptr before. watch_set(id) must have returned nullptr before.
@@ -1777,7 +1728,7 @@ public:
void watch_unset(const page_id_t id) void watch_unset(const page_id_t id)
{ {
const ulint fold= id.fold(); const ulint fold= id.fold();
rw_lock_t *hash_lock= page_hash_lock<true>(fold); page_hash_latch *hash_lock= page_hash.lock<true>(fold);
/* The page must exist because watch_set() increments buf_fix_count. */ /* The page must exist because watch_set() increments buf_fix_count. */
buf_page_t *watch= page_hash_get_low(id, fold); buf_page_t *watch= page_hash_get_low(id, fold);
if (watch->unfix() == 0 && watch_is_sentinel(*watch)) if (watch->unfix() == 0 && watch_is_sentinel(*watch))
@@ -1786,7 +1737,7 @@ public:
ut_ad(watch->in_page_hash); ut_ad(watch->in_page_hash);
ut_d(watch->in_page_hash= false); ut_d(watch->in_page_hash= false);
HASH_DELETE(buf_page_t, hash, &page_hash, fold, watch); HASH_DELETE(buf_page_t, hash, &page_hash, fold, watch);
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
// Now that the watch is detached from page_hash, release it to watch[]. // Now that the watch is detached from page_hash, release it to watch[].
mutex_enter(&mutex); mutex_enter(&mutex);
/* It is possible that watch_remove() already removed the watch. */ /* It is possible that watch_remove() already removed the watch. */
@@ -1799,7 +1750,7 @@ public:
mutex_exit(&mutex); mutex_exit(&mutex);
} }
else else
rw_lock_x_unlock(hash_lock); hash_lock->write_unlock();
} }
/** Remove the sentinel block for the watch before replacing it with a /** Remove the sentinel block for the watch before replacing it with a
@@ -1872,11 +1823,92 @@ public:
/** read-ahead request size in pages */ /** read-ahead request size in pages */
Atomic_counter<uint32_t> read_ahead_area; Atomic_counter<uint32_t> read_ahead_area;
/** Hash table with singly-linked overflow lists. @see hash_table_t */
struct page_hash_table
{
/** Number of array[] elements per page_hash_latch.
Must be one less than a power of 2. */
static constexpr size_t ELEMENTS_PER_LATCH= 1023;
/** number of payload elements in array[] */
Atomic_relaxed<ulint> n_cells;
/** the hash array, with pad(n_cells) elements */
hash_cell_t *array;
/** Create the hash table.
@param n the lower bound of n_cells */
void create(ulint n);
/** Free the hash table. */
void free() { ut_free(array); array= nullptr; }
/** @return the index of an array element */
ulint calc_hash(ulint fold) const { return calc_hash(fold, n_cells); }
/** @return raw array index converted to padded index */
static ulint pad(ulint h) { return 1 + (h / ELEMENTS_PER_LATCH) + h; }
private:
/** @return the hash value before any ELEMENTS_PER_LATCH padding */
static ulint hash(ulint fold, ulint n) { return ut_hash_ulint(fold, n); }
/** @return the index of an array element */
static ulint calc_hash(ulint fold, ulint n_cells)
{
return pad(hash(fold, n_cells));
}
/** Get a page_hash latch. */
page_hash_latch *lock_get(ulint fold, ulint n) const
{
static_assert(!((ELEMENTS_PER_LATCH + 1) & ELEMENTS_PER_LATCH),
"must be one less than a power of 2");
return reinterpret_cast<page_hash_latch*>
(&array[calc_hash(fold, n) & ~ELEMENTS_PER_LATCH]);
}
public:
/** Get a page_hash latch. */
page_hash_latch *lock_get(ulint fold) const
{ return lock_get(fold, n_cells); }
/** Acquire an array latch, tolerating concurrent buf_pool_t::resize()
@tparam exclusive whether the latch is to be acquired exclusively
@param fold hash bucket key */
template<bool exclusive> page_hash_latch *lock(ulint fold)
{
for (;;)
{
auto n= n_cells;
page_hash_latch *latch= lock_get(fold, n);
latch->acquire<exclusive>();
/* Our latch prevents n_cells from changing. */
if (UNIV_LIKELY(n == n_cells))
return latch;
/* Retry, because buf_pool_t::resize_hash() affected us. */
latch->release<exclusive>();
}
}
/** Exclusively aqcuire all latches */
inline void write_lock_all();
/** Release all latches */
inline void write_unlock_all();
};
private:
/** Former page_hash that has been deleted during resize();
singly-linked list via freed_page_hash->array[1] */
page_hash_table *freed_page_hash;
/** Lock all page_hash, also freed_page_hash. */
inline void write_lock_all_page_hash();
/** Release all page_hash, also freed_page_hash. */
inline void write_unlock_all_page_hash();
/** Resize page_hash and zip_hash. */
inline void resize_hash();
public:
/** Hash table of file pages (buf_page_t::in_file() holds), /** Hash table of file pages (buf_page_t::in_file() holds),
indexed by page_id_t. Protected by both mutex and page_hash_latches[]. */ indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */
hash_table_t page_hash; page_hash_table page_hash;
/** Latches protecting page_hash */
mutable rw_lock_t page_hash_latches[MAX_PAGE_HASH_LOCKS];
/** map of block->frame to buf_block_t blocks that belong /** map of block->frame to buf_block_t blocks that belong
to buf_buddy_alloc(); protected by buf_pool.mutex */ to buf_buddy_alloc(); protected by buf_pool.mutex */
@@ -2103,6 +2135,19 @@ private:
/** The InnoDB buffer pool */ /** The InnoDB buffer pool */
extern buf_pool_t buf_pool; extern buf_pool_t buf_pool;
inline void page_hash_latch::read_lock()
{
ut_ad(!mutex_own(&buf_pool.mutex));
if (!read_trylock())
read_lock_wait();
}
inline void page_hash_latch::write_lock()
{
if (!write_trylock())
write_lock_wait();
}
inline void buf_page_t::add_buf_fix_count(uint32_t count) inline void buf_page_t::add_buf_fix_count(uint32_t count)
{ {
ut_ad(mutex_own(&buf_pool.mutex)); ut_ad(mutex_own(&buf_pool.mutex));
@@ -2129,15 +2174,15 @@ inline void buf_page_t::set_state(buf_page_state state)
if (!in_file()) break; if (!in_file()) break;
/* fall through */ /* fall through */
case BUF_BLOCK_FILE_PAGE: case BUF_BLOCK_FILE_PAGE:
ut_ad(rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X)); ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked());
break; break;
case BUF_BLOCK_NOT_USED: case BUF_BLOCK_NOT_USED:
if (!in_file()) break; if (!in_file()) break;
/* fall through */ /* fall through */
case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_PAGE:
ut_ad((this >= &buf_pool.watch[0] && ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked() ||
this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]) || (this >= &buf_pool.watch[0] &&
rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X)); this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]));
break; break;
} }
#endif #endif
@@ -2159,7 +2204,7 @@ inline void buf_page_t::set_corrupt_id()
break; break;
case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_FILE_PAGE: case BUF_BLOCK_FILE_PAGE:
ut_ad(rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X)); ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked());
break; break;
case BUF_BLOCK_NOT_USED: case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_MEMORY: case BUF_BLOCK_MEMORY:

View File

@@ -153,7 +153,7 @@ buf_LRU_stat_update();
@param id page identifier @param id page identifier
@param hash_lock buf_pool.page_hash latch (will be released here) */ @param hash_lock buf_pool.page_hash latch (will be released here) */
void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id, void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id,
rw_lock_t *hash_lock) page_hash_latch *hash_lock)
MY_ATTRIBUTE((nonnull)); MY_ATTRIBUTE((nonnull));
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG

View File

@@ -192,10 +192,43 @@ extern const byte field_ref_zero[UNIV_PAGE_SIZE_MAX];
#include "ut0mutex.h" #include "ut0mutex.h"
#include "sync0rw.h" #include "sync0rw.h"
#include "rw_lock.h"
typedef ib_mutex_t BufPoolMutex; typedef ib_mutex_t BufPoolMutex;
typedef ib_mutex_t FlushListMutex; typedef ib_mutex_t FlushListMutex;
typedef rw_lock_t BPageLock; typedef rw_lock_t BPageLock;
class page_hash_latch : public rw_lock
{
public:
/** Wait for a shared lock */
void read_lock_wait();
/** Wait for an exclusive lock */
void write_lock_wait();
/** Acquire a shared lock */
inline void read_lock();
/** Acquire an exclusive lock */
inline void write_lock();
/** Acquire a lock */
template<bool exclusive> void acquire()
{
if (exclusive)
write_lock();
else
read_lock();
}
/** Release a lock */
template<bool exclusive> void release()
{
if (exclusive)
write_unlock();
else
read_unlock();
}
};
#endif /* !UNIV_INNOCHECKSUM */ #endif /* !UNIV_INNOCHECKSUM */
#endif /* buf0types.h */ #endif /* buf0types.h */

View File

@@ -33,8 +33,6 @@ struct hash_cell_t{
}; };
typedef void* hash_node_t; typedef void* hash_node_t;
#define hash_calc_hash(FOLD, TABLE) (TABLE)->calc_hash(FOLD)
/*******************************************************************//** /*******************************************************************//**
Inserts a struct to a hash table. */ Inserts a struct to a hash table. */
@@ -145,7 +143,7 @@ Gets the next struct in a hash chain, NULL if none. */
Looks for a struct in a hash table. */ Looks for a struct in a hash table. */
#define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\ #define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\
{\ {\
(DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\ (DATA) = (TYPE) HASH_GET_FIRST(TABLE, (TABLE)->calc_hash(FOLD)); \
HASH_ASSERT_VALID(DATA);\ HASH_ASSERT_VALID(DATA);\
\ \
while ((DATA) != NULL) {\ while ((DATA) != NULL) {\

View File

@@ -0,0 +1,106 @@
/*****************************************************************************
Copyright (c) 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
#pragma once
#include <atomic>
#include "my_dbug.h"
/** Simple read-write lock based on std::atomic */
class rw_lock
{
/** The lock word */
std::atomic<uint32_t> lock;
protected:
/** Available lock */
static constexpr uint32_t UNLOCKED= 0;
/** Flag to indicate that write_lock() is being held */
static constexpr uint32_t WRITER= 1 << 31;
/** Flag to indicate that write_lock_wait() is pending */
static constexpr uint32_t WRITER_WAITING= 1 << 30;
/** Flag to indicate that write_lock() or write_lock_wait() is pending */
static constexpr uint32_t WRITER_PENDING= WRITER | WRITER_WAITING;
/** Yield a read lock request due to a conflict with a write lock.
@return the lock value */
uint32_t read_lock_yield()
{
uint32_t l= lock.fetch_sub(1, std::memory_order_relaxed);
DBUG_ASSERT(l & ~WRITER_PENDING);
return l;
}
/** Start waiting for an exclusive lock. */
void write_lock_wait_start()
{ lock.fetch_or(WRITER_WAITING, std::memory_order_relaxed); }
/** Wait for an exclusive lock.
@return whether the exclusive lock was acquired */
bool write_lock_poll()
{
auto l= WRITER_WAITING;
if (lock.compare_exchange_strong(l, WRITER, std::memory_order_acquire,
std::memory_order_relaxed))
return true;
if (!(l & WRITER_WAITING))
/* write_lock() must have succeeded for another thread */
write_lock_wait_start();
return false;
}
public:
/** Default constructor */
rw_lock() : lock(UNLOCKED) {}
/** Release a shared lock */
void read_unlock()
{
IF_DBUG_ASSERT(auto l=,) lock.fetch_sub(1, std::memory_order_release);
DBUG_ASSERT(l & ~WRITER_PENDING); /* at least one read lock */
DBUG_ASSERT(!(l & WRITER)); /* no write lock must have existed */
}
/** Release an exclusive lock */
void write_unlock()
{
IF_DBUG_ASSERT(auto l=,) lock.fetch_sub(WRITER, std::memory_order_release);
DBUG_ASSERT(l & WRITER); /* the write lock must have existed */
}
/** Try to acquire a shared lock.
@return whether the lock was acquired */
bool read_trylock()
{ return !(lock.fetch_add(1, std::memory_order_acquire) & WRITER_PENDING); }
/** Try to acquire an exclusive lock.
@return whether the lock was acquired */
bool write_trylock()
{
auto l= UNLOCKED;
return lock.compare_exchange_strong(l, WRITER, std::memory_order_acquire,
std::memory_order_relaxed);
}
/** @return whether an exclusive lock is being held by any thread */
bool is_write_locked() const
{ return !!(lock.load(std::memory_order_relaxed) & WRITER); }
/** @return whether a shared lock is being held by any thread */
bool is_read_locked() const
{
auto l= lock.load(std::memory_order_relaxed);
return (l & ~WRITER_PENDING) && !(l & WRITER);
}
/** @return whether any lock is being held by any thread */
bool is_locked() const
{ return (lock.load(std::memory_order_relaxed) & ~WRITER_WAITING) != 0; }
};

View File

@@ -333,8 +333,6 @@ extern const ulint srv_buf_pool_min_size;
extern const ulint srv_buf_pool_def_size; extern const ulint srv_buf_pool_def_size;
/** Requested buffer pool chunk size */ /** Requested buffer pool chunk size */
extern ulong srv_buf_pool_chunk_unit; extern ulong srv_buf_pool_chunk_unit;
/** Number of locks to protect buf_pool.page_hash */
extern ulong srv_n_page_hash_locks;
/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/ /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
extern ulong srv_LRU_scan_depth; extern ulong srv_LRU_scan_depth;
/** Whether or not to flush neighbors of a block */ /** Whether or not to flush neighbors of a block */

View File

@@ -226,22 +226,8 @@ rw_lock_lock_word_decr(
caused by concurrent executions of caused by concurrent executions of
rw_lock_s_lock(). */ rw_lock_s_lock(). */
#if 1 /* FIXME: MDEV-22871 Spurious contention between rw_lock_s_lock() */ /* Note: unlike this implementation, rw_lock::read_lock()
allows concurrent calls without a spin loop */
/* When the number of concurrently executing threads
exceeds the number of available processor cores,
multiple buf_pool.page_hash S-latch requests would
conflict here, mostly in buf_page_get_low(). We should
implement a simpler rw-lock where the S-latch
acquisition would be a simple fetch_add(1) followed by
either an optional load() loop to wait for the X-latch
to be released, or a fetch_sub(1) and a retry.
For now, we work around the problem with a delay in
this loop. It helped a little on some systems and was
reducing performance on others. */
(void) LF_BACKOFF();
#endif
} }
/* A real conflict was detected. */ /* A real conflict was detected. */

View File

@@ -126,7 +126,6 @@ extern mysql_pfs_key_t index_tree_rw_lock_key;
extern mysql_pfs_key_t index_online_log_key; extern mysql_pfs_key_t index_online_log_key;
extern mysql_pfs_key_t dict_table_stats_key; extern mysql_pfs_key_t dict_table_stats_key;
extern mysql_pfs_key_t trx_sys_rw_lock_key; extern mysql_pfs_key_t trx_sys_rw_lock_key;
extern mysql_pfs_key_t hash_table_locks_key;
#endif /* UNIV_PFS_RWLOCK */ #endif /* UNIV_PFS_RWLOCK */
/** Prints info of the sync system. /** Prints info of the sync system.

View File

@@ -207,9 +207,6 @@ const ulint srv_buf_pool_min_size = 5 * 1024 * 1024;
const ulint srv_buf_pool_def_size = 128 * 1024 * 1024; const ulint srv_buf_pool_def_size = 128 * 1024 * 1024;
/** Requested buffer pool chunk size */ /** Requested buffer pool chunk size */
ulong srv_buf_pool_chunk_unit; ulong srv_buf_pool_chunk_unit;
/** innodb_page_hash_locks (a debug-only parameter);
number of locks to protect buf_pool.page_hash */
ulong srv_n_page_hash_locks = 64;
/** innodb_lru_scan_depth; number of blocks scanned in LRU flush batch */ /** innodb_lru_scan_depth; number of blocks scanned in LRU flush batch */
ulong srv_LRU_scan_depth; ulong srv_LRU_scan_depth;
/** innodb_flush_neighbors; whether or not to flush neighbors of a block */ /** innodb_flush_neighbors; whether or not to flush neighbors of a block */

View File

@@ -777,7 +777,7 @@ LatchDebug::check_order(
case SYNC_POOL: case SYNC_POOL:
case SYNC_POOL_MANAGER: case SYNC_POOL_MANAGER:
case SYNC_RECV_WRITER: case SYNC_RECV_WRITER:
case SYNC_BUF_PAGE_HASH:
basic_check(latches, level, level); basic_check(latches, level, level);
break; break;
@@ -825,14 +825,6 @@ LatchDebug::check_order(
basic_check(latches, level, level - 1); basic_check(latches, level, level - 1);
break; break;
case SYNC_BUF_PAGE_HASH:
/* Multiple page_hash locks are only allowed during
buf_pool.validate() and that is where buf_pool mutex is already
held. */
/* Fall through */
case SYNC_REC_LOCK: case SYNC_REC_LOCK:
if (find(latches, SYNC_LOCK_SYS) != 0) { if (find(latches, SYNC_LOCK_SYS) != 0) {
@@ -1453,9 +1445,6 @@ sync_latch_meta_init()
LATCH_ADD_RWLOCK(DICT_TABLE_STATS, SYNC_INDEX_TREE, LATCH_ADD_RWLOCK(DICT_TABLE_STATS, SYNC_INDEX_TREE,
dict_table_stats_key); dict_table_stats_key);
LATCH_ADD_RWLOCK(HASH_TABLE_RW_LOCK, SYNC_BUF_PAGE_HASH,
hash_table_locks_key);
LATCH_ADD_MUTEX(SYNC_DEBUG_MUTEX, SYNC_NO_ORDER_CHECK, LATCH_ADD_MUTEX(SYNC_DEBUG_MUTEX, SYNC_NO_ORDER_CHECK,
PFS_NOT_INSTRUMENTED); PFS_NOT_INSTRUMENTED);

View File

@@ -102,7 +102,6 @@ mysql_pfs_key_t buf_block_debug_latch_key;
# endif /* UNIV_DEBUG */ # endif /* UNIV_DEBUG */
mysql_pfs_key_t dict_operation_lock_key; mysql_pfs_key_t dict_operation_lock_key;
mysql_pfs_key_t dict_table_stats_key; mysql_pfs_key_t dict_table_stats_key;
mysql_pfs_key_t hash_table_locks_key;
mysql_pfs_key_t index_tree_rw_lock_key; mysql_pfs_key_t index_tree_rw_lock_key;
mysql_pfs_key_t index_online_log_key; mysql_pfs_key_t index_online_log_key;
mysql_pfs_key_t fil_space_latch_key; mysql_pfs_key_t fil_space_latch_key;