diff --git a/buf/buf0buf.c b/buf/buf0buf.c index 222acc015a6..99e365d6129 100644 --- a/buf/buf0buf.c +++ b/buf/buf0buf.c @@ -221,6 +221,9 @@ in the free list to the frames. 5) When we have AWE enabled, we disable adaptive hash indexes. */ +/* Value in microseconds */ +static const int WAIT_FOR_READ = 20000; + buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */ #ifdef UNIV_DEBUG @@ -539,6 +542,8 @@ buf_block_init( block->n_pointers = 0; + mutex_create(&block->mutex, SYNC_BUF_BLOCK); + rw_lock_create(&block->lock, SYNC_LEVEL_VARYING); ut_ad(rw_lock_validate(&(block->lock))); @@ -813,8 +818,15 @@ buf_awe_map_page_to_frame( bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped); while (bck) { - if (bck->state == BUF_BLOCK_FILE_PAGE - && (bck->buf_fix_count != 0 || bck->io_fix != 0)) { + ibool skip; + + mutex_enter(&bck->mutex); + + skip = (bck->state == BUF_BLOCK_FILE_PAGE + && (bck->buf_fix_count != 0 || bck->io_fix != 0)); + + if (skip) { + mutex_exit(&bck->mutex); /* We have to skip this */ bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck); @@ -848,6 +860,8 @@ buf_awe_map_page_to_frame( buf_pool->n_pages_awe_remapped++; + mutex_exit(&bck->mutex); + return; } } @@ -886,13 +900,22 @@ buf_block_make_young( /*=================*/ buf_block_t* block) /* in: block to make younger */ { - if (buf_pool->freed_page_clock >= block->freed_page_clock - + 1 + (buf_pool->curr_size / 1024)) { +#ifdef UNIV_SYNC_DEBUG + ut_ad(!mutex_own(&(buf_pool->mutex))); +#endif /* UNIV_SYNC_DEBUG */ + /* Note that we read freed_page_clock's without holding any mutex: + this is allowed since the result is used only in heuristics */ + + if (buf_pool->freed_page_clock >= block->freed_page_clock + + 1 + (buf_pool->curr_size / 4)) { + + mutex_enter(&buf_pool->mutex); /* There has been freeing activity in the LRU list: best to move to the head of the LRU list */ buf_LRU_make_block_young(block); + mutex_exit(&buf_pool->mutex); } } @@ -927,12 +950,16 @@ buf_block_free( /*===========*/ buf_block_t* block) /* in, own: block to be freed */ { - ut_a(block->state != BUF_BLOCK_FILE_PAGE); - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); + + ut_a(block->state != BUF_BLOCK_FILE_PAGE); + buf_LRU_block_free_non_file_page(block); + mutex_exit(&block->mutex); + mutex_exit(&(buf_pool->mutex)); } @@ -1151,9 +1178,8 @@ buf_page_get_gen( #endif buf_pool->n_page_gets++; loop: - mutex_enter_fast(&(buf_pool->mutex)); - block = NULL; + mutex_enter_fast(&(buf_pool->mutex)); if (guess) { block = buf_block_align(guess); @@ -1191,6 +1217,8 @@ loop: goto loop; } + mutex_enter(&block->mutex); + ut_a(block->state == BUF_BLOCK_FILE_PAGE); must_read = FALSE; @@ -1200,9 +1228,9 @@ loop: must_read = TRUE; if (mode == BUF_GET_IF_IN_POOL) { - /* The page is only being read to buffer */ - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&buf_pool->mutex); + mutex_exit(&block->mutex); return(NULL); } @@ -1226,7 +1254,7 @@ loop: #else buf_block_buf_fix_inc(block); #endif - buf_block_make_young(block); + mutex_exit(&buf_pool->mutex); /* Check if this is the first access to the page */ @@ -1234,10 +1262,13 @@ loop: block->accessed = TRUE; + mutex_exit(&block->mutex); + + buf_block_make_young(block); + #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(block->file_page_was_freed == FALSE); #endif - mutex_exit(&(buf_pool->mutex)); #ifdef UNIV_DEBUG buf_dbg_counter++; @@ -1262,13 +1293,14 @@ loop: } if (!success) { - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); block->buf_fix_count--; + + mutex_exit(&block->mutex); #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); #endif - mutex_exit(&(buf_pool->mutex)); return(NULL); } @@ -1279,18 +1311,16 @@ loop: completes */ for (;;) { - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); if (block->io_fix == BUF_IO_READ) { - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); - /* Sleep 20 milliseconds */ - - os_thread_sleep(20000); + os_thread_sleep(WAIT_FOR_READ); } else { - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); break; } @@ -1349,14 +1379,14 @@ buf_page_optimistic_get_func( ut_ad(mtr && block); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); - mutex_enter(&(buf_pool->mutex)); - /* If AWE is used, block may have a different frame now, e.g., NULL */ + mutex_enter(&block->mutex); + if (UNIV_UNLIKELY(block->state != BUF_BLOCK_FILE_PAGE) || UNIV_UNLIKELY(block->frame != guess)) { -exit_func: - mutex_exit(&(buf_pool->mutex)); + + mutex_exit(&block->mutex); return(FALSE); } @@ -1366,16 +1396,15 @@ exit_func: #else buf_block_buf_fix_inc(block); #endif + accessed = block->accessed; + block->accessed = TRUE; + + mutex_exit(&block->mutex); + buf_block_make_young(block); /* Check if this is the first access to the page */ - accessed = block->accessed; - - block->accessed = TRUE; - - mutex_exit(&(buf_pool->mutex)); - ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset)); if (rw_latch == RW_S_LATCH) { @@ -1389,13 +1418,16 @@ exit_func: } if (UNIV_UNLIKELY(!success)) { - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); block->buf_fix_count--; + + mutex_exit(&block->mutex); + #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); #endif - goto exit_func; + return(FALSE); } if (UNIV_UNLIKELY(!UT_DULINT_EQ(modify_clock, block->modify_clock))) { @@ -1408,13 +1440,16 @@ exit_func: rw_lock_x_unlock(&(block->lock)); } - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); block->buf_fix_count--; + + mutex_exit(&block->mutex); + #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); #endif - goto exit_func; + return(FALSE); } mtr_memo_push(mtr, block, fix_type); @@ -1471,10 +1506,10 @@ buf_page_get_known_nowait( ut_ad(mtr); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); - mutex_enter(&(buf_pool->mutex)); - block = buf_block_align(guess); + mutex_enter(&block->mutex); + if (block->state == BUF_BLOCK_REMOVE_HASH) { /* Another thread is just freeing the block from the LRU list of the buffer pool: do not try to access this page; this @@ -1483,7 +1518,7 @@ buf_page_get_known_nowait( we have already removed it from the page address hash table of the buffer pool. */ - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); return(FALSE); } @@ -1495,12 +1530,12 @@ buf_page_get_known_nowait( #else buf_block_buf_fix_inc(block); #endif + mutex_exit(&block->mutex); + if (mode == BUF_MAKE_YOUNG) { buf_block_make_young(block); } - mutex_exit(&(buf_pool->mutex)); - ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD)); if (rw_latch == RW_S_LATCH) { @@ -1514,13 +1549,15 @@ buf_page_get_known_nowait( } if (!success) { - mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); block->buf_fix_count--; + + mutex_exit(&block->mutex); + #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); #endif - mutex_exit(&(buf_pool->mutex)); return(FALSE); } @@ -1568,7 +1605,6 @@ buf_page_init_for_backup_restore( block->offset = offset; block->lock_hash_val = 0; - block->lock_mutex = NULL; block->freed_page_clock = 0; @@ -1601,6 +1637,7 @@ buf_page_init( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&(block->mutex))); #endif /* UNIV_SYNC_DEBUG */ ut_a(block->state != BUF_BLOCK_FILE_PAGE); @@ -1615,7 +1652,6 @@ buf_page_init( block->index = NULL; block->lock_hash_val = lock_rec_hash(space, offset); - block->lock_mutex = NULL; /* Insert into the hash table of file pages */ @@ -1709,6 +1745,7 @@ buf_page_init_for_read( ut_a(block); mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); if (fil_tablespace_deleted_or_being_deleted_in_mem( space, tablespace_version)) { @@ -1722,7 +1759,9 @@ buf_page_init_for_read( deleted or is being deleted, or the page is already in buf_pool, return */ + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); + buf_block_free(block); if (mode == BUF_READ_IBUF_PAGES_ONLY) { @@ -1742,6 +1781,7 @@ buf_page_init_for_read( buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */ block->io_fix = BUF_IO_READ; + buf_pool->n_pend_reads++; /* We set a pass-type x-lock on the frame because then the same @@ -1753,6 +1793,7 @@ buf_page_init_for_read( rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ); + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); if (mode == BUF_READ_IBUF_PAGES_ONLY) { @@ -1817,6 +1858,8 @@ buf_page_create( block = free_block; + mutex_enter(&block->mutex); + buf_page_init(space, offset, block); /* The block must be put to the LRU list */ @@ -1827,13 +1870,15 @@ buf_page_create( #else buf_block_buf_fix_inc(block); #endif + buf_pool->n_pages_created++; + + mutex_exit(&(buf_pool->mutex)); + mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX); block->accessed = TRUE; - buf_pool->n_pages_created++; - - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); /* Delete possible entries for the page from the insert buffer: such can exist if the page belonged to an index which was dropped */ @@ -1885,6 +1930,12 @@ buf_page_io_complete( ut_a(block->state == BUF_BLOCK_FILE_PAGE); + /* We do not need protect block->io_fix here by block->mutex to read + it because this is the only function where we can change the value + from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code + ensures that this is the only thread that handles the i/o for this + block. */ + io_type = block->io_fix; if (io_type == BUF_IO_READ) { @@ -1986,11 +2037,12 @@ buf_page_io_complete( } } + mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); + #ifdef UNIV_IBUF_DEBUG ut_a(ibuf_count_get(block->space, block->offset) == 0); #endif - mutex_enter(&(buf_pool->mutex)); - /* Because this thread which does the unlocking is not the same that did the locking, we use a pass value != 0 in unlock, which simply removes the newest lock debug record, without checking the thread @@ -2033,6 +2085,7 @@ buf_page_io_complete( #endif /* UNIV_DEBUG */ } + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); #ifdef UNIV_DEBUG @@ -2095,6 +2148,8 @@ buf_validate(void) block = buf_pool_get_nth_block(buf_pool, i); + mutex_enter(&block->mutex); + if (block->state == BUF_BLOCK_FILE_PAGE) { ut_a(buf_page_hash_get(block->space, @@ -2139,6 +2194,8 @@ buf_validate(void) } else if (block->state == BUF_BLOCK_NOT_USED) { n_free++; } + + mutex_exit(&block->mutex); } if (n_lru + n_free > buf_pool->curr_size) { @@ -2286,9 +2343,14 @@ buf_get_latched_pages_number(void) block = buf_pool_get_nth_block(buf_pool, i); - if (((block->buf_fix_count != 0) || (block->io_fix != 0)) - && block->magic_n == BUF_BLOCK_MAGIC_N) { - fixed_pages_number++; + if (block->magic_n == BUF_BLOCK_MAGIC_N) { + mutex_enter(&block->mutex); + + if (block->buf_fix_count != 0 || block->io_fix != 0) { + fixed_pages_number++; + } + + mutex_exit(&block->mutex); } } @@ -2458,6 +2520,8 @@ buf_all_freed(void) block = buf_pool_get_nth_block(buf_pool, i); + mutex_enter(&block->mutex); + if (block->state == BUF_BLOCK_FILE_PAGE) { if (!buf_flush_ready_for_replace(block)) { @@ -2469,6 +2533,8 @@ buf_all_freed(void) ut_error; } } + + mutex_exit(&block->mutex); } mutex_exit(&(buf_pool->mutex)); diff --git a/buf/buf0flu.c b/buf/buf0flu.c index 8ef8c3b9358..49b81196a64 100644 --- a/buf/buf0flu.c +++ b/buf/buf0flu.c @@ -113,6 +113,7 @@ buf_flush_ready_for_replace( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&block->mutex)); #endif /* UNIV_SYNC_DEBUG */ if (block->state != BUF_BLOCK_FILE_PAGE) { ut_print_timestamp(stderr); @@ -148,6 +149,7 @@ buf_flush_ready_for_flush( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&(block->mutex))); #endif /* UNIV_SYNC_DEBUG */ ut_a(block->state == BUF_BLOCK_FILE_PAGE); @@ -559,8 +561,15 @@ buf_flush_try_page( ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE); + if (!block) { + mutex_exit(&(buf_pool->mutex)); + return(0); + } + + mutex_enter(&block->mutex); + if (flush_type == BUF_FLUSH_LIST - && block && buf_flush_ready_for_flush(block, flush_type)) { + && buf_flush_ready_for_flush(block, flush_type)) { block->io_fix = BUF_IO_WRITE; @@ -598,6 +607,7 @@ buf_flush_try_page( locked = TRUE; } + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); if (!locked) { @@ -618,7 +628,7 @@ buf_flush_try_page( return(1); - } else if (flush_type == BUF_FLUSH_LRU && block + } else if (flush_type == BUF_FLUSH_LRU && buf_flush_ready_for_flush(block, flush_type)) { /* VERY IMPORTANT: @@ -659,13 +669,14 @@ buf_flush_try_page( buf_pool mutex: this ensures that the latch is acquired immediately. */ + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); buf_flush_write_block_low(block); return(1); - } else if (flush_type == BUF_FLUSH_SINGLE_PAGE && block + } else if (flush_type == BUF_FLUSH_SINGLE_PAGE && buf_flush_ready_for_flush(block, flush_type)) { block->io_fix = BUF_IO_WRITE; @@ -692,6 +703,7 @@ buf_flush_try_page( (buf_pool->n_flush[flush_type])++; + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); @@ -709,11 +721,12 @@ buf_flush_try_page( buf_flush_write_block_low(block); return(1); - } else { - mutex_exit(&(buf_pool->mutex)); - - return(0); } + + mutex_exit(&block->mutex); + mutex_exit(&(buf_pool->mutex)); + + return(0); } /*************************************************************** @@ -758,34 +771,48 @@ buf_flush_try_neighbors( block = buf_page_hash_get(space, i); ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE); - if (block && flush_type == BUF_FLUSH_LRU && i != offset - && !block->old) { + if (!block) { + + continue; + + } else if (flush_type == BUF_FLUSH_LRU && i != offset + && !block->old) { /* We avoid flushing 'non-old' blocks in an LRU flush, because the flushed blocks are soon freed */ continue; - } + } else { - if (block && buf_flush_ready_for_flush(block, flush_type) - && (i == offset || block->buf_fix_count == 0)) { - /* We only try to flush those neighbors != offset - where the buf fix count is zero, as we then know that - we probably can latch the page without a semaphore - wait. Semaphore waits are expensive because we must - flush the doublewrite buffer before we start - waiting. */ + mutex_enter(&block->mutex); - mutex_exit(&(buf_pool->mutex)); + if (buf_flush_ready_for_flush(block, flush_type) + && (i == offset || block->buf_fix_count == 0)) { + /* We only try to flush those + neighbors != offset where the buf fix count is + zero, as we then know that we probably can + latch the page without a semaphore wait. + Semaphore waits are expensive because we must + flush the doublewrite buffer before we start + waiting. */ - /* Note: as we release the buf_pool mutex above, in - buf_flush_try_page we cannot be sure the page is still - in a flushable state: therefore we check it again - inside that function. */ + mutex_exit(&block->mutex); - count += buf_flush_try_page(space, i, flush_type); + mutex_exit(&(buf_pool->mutex)); - mutex_enter(&(buf_pool->mutex)); + /* Note: as we release the buf_pool mutex + above, in buf_flush_try_page we cannot be sure + the page is still in a flushable state: + therefore we check it again inside that + function. */ + + count += buf_flush_try_page(space, i, + flush_type); + + mutex_enter(&(buf_pool->mutex)); + } else { + mutex_exit(&block->mutex); + } } } @@ -879,12 +906,15 @@ buf_flush_batch( while ((block != NULL) && !found) { ut_a(block->state == BUF_BLOCK_FILE_PAGE); + mutex_enter(&block->mutex); + if (buf_flush_ready_for_flush(block, flush_type)) { found = TRUE; space = block->space; offset = block->offset; + mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); old_page_count = page_count; @@ -901,10 +931,14 @@ buf_flush_batch( } else if (flush_type == BUF_FLUSH_LRU) { + mutex_exit(&block->mutex); + block = UT_LIST_GET_PREV(LRU, block); } else { ut_ad(flush_type == BUF_FLUSH_LIST); + mutex_exit(&block->mutex); + block = UT_LIST_GET_PREV(flush_list, block); } } @@ -986,10 +1020,14 @@ buf_flush_LRU_recommendation(void) + BUF_FLUSH_EXTRA_MARGIN) && (distance < BUF_LRU_FREE_SEARCH_LEN)) { + mutex_enter(&block->mutex); + if (buf_flush_ready_for_replace(block)) { n_replaceable++; } + mutex_exit(&block->mutex); + distance++; block = UT_LIST_GET_PREV(LRU, block); diff --git a/buf/buf0lru.c b/buf/buf0lru.c index 4ebe94c40ec..25b15899cf0 100644 --- a/buf/buf0lru.c +++ b/buf/buf0lru.c @@ -86,6 +86,9 @@ scan_again: block = UT_LIST_GET_LAST(buf_pool->LRU); while (block != NULL) { + + mutex_enter(&block->mutex); + ut_a(block->state == BUF_BLOCK_FILE_PAGE); if (block->space == id @@ -112,6 +115,8 @@ scan_again: if (block->is_hashed) { page_no = block->offset; + mutex_exit(&block->mutex); + mutex_exit(&(buf_pool->mutex)); /* Note that the following call will acquire @@ -138,6 +143,7 @@ scan_again: buf_LRU_block_free_hashed_page(block); } next_page: + mutex_exit(&block->mutex); block = UT_LIST_GET_PREV(LRU, block); } @@ -211,6 +217,9 @@ buf_LRU_search_and_free_block( while (block != NULL) { ut_a(block->in_LRU_list); + + mutex_enter(&block->mutex); + if (buf_flush_ready_for_replace(block)) { #ifdef UNIV_DEBUG @@ -226,6 +235,7 @@ buf_LRU_search_and_free_block( buf_LRU_block_remove_hashed_page(block); mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); /* Remove possible adaptive hash index built on the page; in the case of AWE the block may not have a @@ -234,15 +244,21 @@ buf_LRU_search_and_free_block( if (block->frame) { btr_search_drop_page_hash_index(block->frame); } - mutex_enter(&(buf_pool->mutex)); ut_a(block->buf_fix_count == 0); + mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); + buf_LRU_block_free_hashed_page(block); freed = TRUE; + mutex_exit(&block->mutex); break; } + + mutex_exit(&block->mutex); + block = UT_LIST_GET_PREV(LRU, block); distance++; @@ -428,8 +444,12 @@ loop: } } + mutex_enter(&block->mutex); + block->state = BUF_BLOCK_READY_FOR_USE; + mutex_exit(&block->mutex); + mutex_exit(&(buf_pool->mutex)); if (started_monitor) { @@ -838,6 +858,7 @@ buf_LRU_block_free_non_file_page( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&block->mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_ad(block); @@ -877,6 +898,7 @@ buf_LRU_block_remove_hashed_page( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&block->mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_ad(block); @@ -939,6 +961,7 @@ buf_LRU_block_free_hashed_page( { #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); + ut_ad(mutex_own(&block->mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_a(block->state == BUF_BLOCK_REMOVE_HASH); diff --git a/include/buf0buf.h b/include/buf0buf.h index 2fc6bf4bcb9..979c28f64ed 100644 --- a/include/buf0buf.h +++ b/include/buf0buf.h @@ -461,8 +461,8 @@ Gets the mutex number protecting the page record lock hash chain in the lock table. */ UNIV_INLINE mutex_t* -buf_frame_get_lock_mutex( -/*=====================*/ +buf_frame_get_mutex( +/*================*/ /* out: mutex */ byte* ptr); /* in: pointer to within a buffer frame */ /*********************************************************************** @@ -713,7 +713,10 @@ struct buf_block_struct{ ulint magic_n; /* magic number to check */ ulint state; /* state of the control block: - BUF_BLOCK_NOT_USED, ... */ + BUF_BLOCK_NOT_USED, ...; changing + this is only allowed when a thread + has BOTH the buffer pool mutex AND + block->mutex locked */ byte* frame; /* pointer to buffer frame which is of size UNIV_PAGE_SIZE, and aligned to an address divisible by @@ -731,8 +734,12 @@ struct buf_block_struct{ ulint offset; /* page number within the space */ ulint lock_hash_val; /* hashed value of the page address in the record lock hash table */ - mutex_t* lock_mutex; /* mutex protecting the chain in the - record lock hash table */ + mutex_t mutex; /* mutex protecting this block: + state (also protected by the buffer + pool mutex), io_fix, buf_fix_count, + and accessed; we introduce this new + mutex in InnoDB-5.1 to relieve + contention on the buffer pool mutex */ rw_lock_t lock; /* read-write lock of the buffer frame */ buf_block_t* hash; /* node used in chaining to the page @@ -788,20 +795,27 @@ struct buf_block_struct{ in heuristic algorithms, because of the possibility of a wrap-around! */ ulint freed_page_clock;/* the value of freed_page_clock - buffer pool when this block was - last time put to the head of the - LRU list */ + of the buffer pool when this block was + the last time put to the head of the + LRU list; a thread is allowed to + read this for heuristic purposes + without holding any mutex or latch */ ibool old; /* TRUE if the block is in the old blocks in the LRU list */ ibool accessed; /* TRUE if the page has been accessed while in the buffer pool: read-ahead may read in pages which have not been - accessed yet */ + accessed yet; this is protected by + block->mutex; a thread is allowed to + read this for heuristic purposes + without holding any mutex or latch */ ulint buf_fix_count; /* count of how manyfold this block - is currently bufferfixed */ + is currently bufferfixed; this is + protected by block->mutex */ ulint io_fix; /* if a read is pending to the frame, io_fix is BUF_IO_READ, in the case - of a write BUF_IO_WRITE, otherwise 0 */ + of a write BUF_IO_WRITE, otherwise 0; + this is protected by block->mutex */ /* 4. Optimistic search field */ dulint modify_clock; /* this clock is incremented every @@ -959,7 +973,9 @@ struct buf_pool_struct{ number of buffer blocks removed from the end of the LRU list; NOTE that this counter may wrap around at 4 - billion! */ + billion! A thread is allowed to + read this for heuristic purposes + without holding any mutex or latch */ ulint LRU_flush_ended;/* when an LRU flush ends for a page, this is incremented by one; this is set to zero when a buffer block is diff --git a/include/buf0buf.ic b/include/buf0buf.ic index 015de862705..43895295734 100644 --- a/include/buf0buf.ic +++ b/include/buf0buf.ic @@ -337,8 +337,8 @@ Gets the mutex number protecting the page record lock hash chain in the lock table. */ UNIV_INLINE mutex_t* -buf_frame_get_lock_mutex( -/*=====================*/ +buf_frame_get_mutex( +/*================*/ /* out: mutex */ byte* ptr) /* in: pointer to within a buffer frame */ { @@ -346,7 +346,7 @@ buf_frame_get_lock_mutex( block = buf_block_align(ptr); - return(block->lock_mutex); + return(&block->mutex); } /************************************************************************* @@ -519,6 +519,7 @@ buf_block_buf_fix_inc_debug( ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line); ut_ad(ret == TRUE); + ut_ad(mutex_own(&block->mutex)); #endif block->buf_fix_count++; } @@ -531,6 +532,9 @@ buf_block_buf_fix_inc( /*==================*/ buf_block_t* block) /* in: block to bufferfix */ { +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&block->mutex)); +#endif block->buf_fix_count++; } #endif /* UNIV_SYNC_DEBUG */ @@ -625,23 +629,24 @@ buf_page_release( ut_ad(block); - mutex_enter_fast(&(buf_pool->mutex)); - ut_a(block->state == BUF_BLOCK_FILE_PAGE); ut_a(block->buf_fix_count > 0); if (rw_latch == RW_X_LATCH && mtr->modifications) { - + mutex_enter(&buf_pool->mutex); buf_flush_note_modification(block, mtr); + mutex_exit(&buf_pool->mutex); } + mutex_enter(&block->mutex); + #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); #endif buf_fix_count = block->buf_fix_count; block->buf_fix_count = buf_fix_count - 1; - mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); if (rw_latch == RW_S_LATCH) { rw_lock_s_unlock(&(block->lock));