diff --git a/mysql-test/suite/innodb/r/alter_copy.result b/mysql-test/suite/innodb/r/alter_copy.result index 8c9e5966b2e..72ae28e9652 100644 --- a/mysql-test/suite/innodb/r/alter_copy.result +++ b/mysql-test/suite/innodb/r/alter_copy.result @@ -51,7 +51,7 @@ ADD INDEX(a,b,d), ADD INDEX(a,d,b), ADD INDEX(b,c,d), ADD INDEX(b,d,c), ALGORITHM=COPY; connection default; SET DEBUG_SYNC='now WAIT_FOR hung'; -# restart: --innodb-force-recovery=3 --debug_dbug=+d,recv_ran_out_of_buffer +# restart: --innodb-force-recovery=3 disconnect hang; FTS_INDEX_1.ibd FTS_INDEX_2.ibd diff --git a/mysql-test/suite/innodb/r/innodb-lru-force-no-free-page.result b/mysql-test/suite/innodb/r/innodb-lru-force-no-free-page.result deleted file mode 100644 index ac3010a101a..00000000000 --- a/mysql-test/suite/innodb/r/innodb-lru-force-no-free-page.result +++ /dev/null @@ -1,9 +0,0 @@ -SET @saved_debug = @@SESSION.debug_dbug; -SET SESSION debug_dbug="+d,ib_lru_force_no_free_page"; -CREATE TABLE t1 (j LONGBLOB) ENGINE = InnoDB; -BEGIN; -INSERT INTO t1 VALUES (repeat('abcdefghijklmnopqrstuvwxyz',200)); -COMMIT; -SET debug_dbug = @saved_debug; -DROP TABLE t1; -FOUND 1 /InnoDB: Difficult to find free blocks / in mysqld.1.err diff --git a/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result b/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result index 6c10b4bad5f..a8b30bee31d 100644 --- a/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result +++ b/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result @@ -88,7 +88,6 @@ buffer_flush_n_to_flush_by_age buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NUL buffer_flush_adaptive_avg_time buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Avg time (ms) spent for adaptive flushing recently. buffer_flush_adaptive_avg_pass buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of adaptive flushes passed during the recent Avg period. buffer_LRU_get_free_loops buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Total loops in LRU get free. -buffer_LRU_get_free_waits buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Total sleep waits in LRU get free. buffer_flush_avg_page_rate buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Average number of pages at which flushing is happening buffer_flush_lsn_avg_rate buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Average redo generation rate buffer_flush_pct_for_dirty buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Percent of IO capacity used to avoid max dirty page limit @@ -108,7 +107,6 @@ buffer_LRU_batch_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NUL buffer_LRU_batch_scanned_per_call buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 set_member Pages scanned per LRU batch call buffer_LRU_batch_flush_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Total pages flushed as part of LRU batches buffer_LRU_batch_evict_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Total pages evicted as part of LRU batches -buffer_LRU_single_flush_failure_count Buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of times attempt to flush a single page from LRU failed buffer_LRU_get_free_search Buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of searches performed for a clean page buffer_LRU_search_scanned buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 set_owner Total pages scanned as part of LRU search buffer_LRU_search_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 set_member Number of times LRU search is performed diff --git a/mysql-test/suite/innodb/r/monitor.result b/mysql-test/suite/innodb/r/monitor.result index c45462f418c..3d500609524 100644 --- a/mysql-test/suite/innodb/r/monitor.result +++ b/mysql-test/suite/innodb/r/monitor.result @@ -53,7 +53,6 @@ buffer_flush_n_to_flush_by_age disabled buffer_flush_adaptive_avg_time disabled buffer_flush_adaptive_avg_pass disabled buffer_LRU_get_free_loops disabled -buffer_LRU_get_free_waits disabled buffer_flush_avg_page_rate disabled buffer_flush_lsn_avg_rate disabled buffer_flush_pct_for_dirty disabled @@ -73,7 +72,6 @@ buffer_LRU_batch_num_scan disabled buffer_LRU_batch_scanned_per_call disabled buffer_LRU_batch_flush_total_pages enabled buffer_LRU_batch_evict_total_pages enabled -buffer_LRU_single_flush_failure_count disabled buffer_LRU_get_free_search disabled buffer_LRU_search_scanned disabled buffer_LRU_search_num_scan disabled diff --git a/mysql-test/suite/innodb/t/alter_copy.test b/mysql-test/suite/innodb/t/alter_copy.test index b62f812f4b7..90f2171d10b 100644 --- a/mysql-test/suite/innodb/t/alter_copy.test +++ b/mysql-test/suite/innodb/t/alter_copy.test @@ -57,7 +57,7 @@ ALTER TABLE t ADD INDEX(b,c,d,a),ADD INDEX(b,c,a,d),ADD INDEX(b,a,c,d),ADD INDEX connection default; SET DEBUG_SYNC='now WAIT_FOR hung'; let $shutdown_timeout=0; ---let $restart_parameters= --innodb-force-recovery=3 --debug_dbug="+d,recv_ran_out_of_buffer" +--let $restart_parameters= --innodb-force-recovery=3 --source include/restart_mysqld.inc disconnect hang; let $shutdown_timeout=; diff --git a/mysql-test/suite/innodb/t/innodb-lru-force-no-free-page.test b/mysql-test/suite/innodb/t/innodb-lru-force-no-free-page.test deleted file mode 100644 index bbf0e1757c3..00000000000 --- a/mysql-test/suite/innodb/t/innodb-lru-force-no-free-page.test +++ /dev/null @@ -1,22 +0,0 @@ ---source include/have_innodb.inc ---source include/have_debug.inc ---source include/not_embedded.inc - -SET @saved_debug = @@SESSION.debug_dbug; -SET SESSION debug_dbug="+d,ib_lru_force_no_free_page"; - -CREATE TABLE t1 (j LONGBLOB) ENGINE = InnoDB; -BEGIN; -INSERT INTO t1 VALUES (repeat('abcdefghijklmnopqrstuvwxyz',200)); -COMMIT; - -SET debug_dbug = @saved_debug; - -DROP TABLE t1; - -# -# There should be only one message -# -let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; ---let SEARCH_PATTERN=InnoDB: Difficult to find free blocks ---source include/search_pattern_in_file.inc diff --git a/mysql-test/suite/innodb/t/purge_secondary.test b/mysql-test/suite/innodb/t/purge_secondary.test index 8a38a418877..ec02c726891 100644 --- a/mysql-test/suite/innodb/t/purge_secondary.test +++ b/mysql-test/suite/innodb/t/purge_secondary.test @@ -4,10 +4,6 @@ SET @save_stats_persistent = @@GLOBAL.innodb_stats_persistent; SET GLOBAL innodb_stats_persistent = 0; ---disable_query_log -call mtr.add_suppression("InnoDB: Difficult to find free blocks in the buffer pool"); ---enable_query_log - CREATE TABLE t1 ( a SERIAL, b CHAR(255) NOT NULL DEFAULT '', c BOOLEAN DEFAULT false, l LINESTRING NOT NULL DEFAULT ST_linefromtext('linestring(448 -689, diff --git a/mysql-test/suite/innodb_zip/t/innochecksum_2.test b/mysql-test/suite/innodb_zip/t/innochecksum_2.test index 62e792c1ce4..57926d921f4 100644 --- a/mysql-test/suite/innodb_zip/t/innochecksum_2.test +++ b/mysql-test/suite/innodb_zip/t/innochecksum_2.test @@ -9,11 +9,6 @@ --source include/not_embedded.inc -- source include/big_test.inc ---disable_query_log -# This warning occurs due to small buffer pool size(i.e. 8MB). It doesn't occur -# with --mysqld=--innodb_buffer_pool_size=10MB -call mtr.add_suppression("\\[Warning\\] InnoDB: Difficult to find free blocks in the buffer pool.*"); ---enable_query_log let MYSQLD_BASEDIR= `SELECT @@basedir`; let MYSQLD_DATADIR= `SELECT @@datadir`; let SEARCH_FILE= $MYSQLTEST_VARDIR/log/my_restart.err; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 6abbda1f45c..90f47f1fbf0 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1397,17 +1397,14 @@ inline bool buf_pool_t::withdraw_blocks() /* reserve free_list length */ if (UT_LIST_GET_LEN(withdraw) < withdraw_target) { - buf_flush_LRU( - std::max(withdraw_target - - UT_LIST_GET_LEN(withdraw), - srv_LRU_scan_depth), - true); - mysql_mutex_unlock(&buf_pool.mutex); - buf_dblwr.flush_buffered_writes(); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - buf_flush_wait_LRU_batch_end(); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - mysql_mutex_lock(&buf_pool.mutex); + try_LRU_scan = false; + mysql_mutex_unlock(&mutex); + mysql_mutex_lock(&flush_list_mutex); + page_cleaner_wakeup(true); + my_cond_wait(&done_flush_list, + &flush_list_mutex.m_mutex); + mysql_mutex_unlock(&flush_list_mutex); + mysql_mutex_lock(&mutex); } /* relocate blocks/buddies in withdrawn area */ @@ -2003,7 +2000,10 @@ buf_page_t *buf_pool_t::watch_set(const page_id_t id, got_block: bpage->fix(); if (watch_is_sentinel(*bpage)) + { + ut_ad(!bpage->oldest_modification()); bpage= nullptr; + } page_hash.lock_get(chain).unlock(); return bpage; } @@ -2075,6 +2075,7 @@ void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain) } else { + ut_ad(!w->oldest_modification()); const auto state= w->state(); ut_ad(~buf_page_t::LRU_MASK & state); ut_ad(state >= buf_page_t::UNFIXED + 1); diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 696a09f60d4..d7e4e19c672 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -311,30 +311,22 @@ buf_flush_relocate_on_flush_list( ut_d(buf_flush_validate_low()); } -/** Note that a block is no longer dirty, while not removing -it from buf_pool.flush_list -@param temporary whether the page belongs to the temporary tablespace -@param error whether an error may have occurred while writing */ -inline void buf_page_t::write_complete(bool temporary, bool error) +void buf_page_t::write_complete(bool persistent, bool error, uint32_t state) { - ut_ad(temporary == fsp_is_system_temporary(id().space())); - if (UNIV_UNLIKELY(error)); - else if (temporary) - { - ut_ad(oldest_modification() == 2); - oldest_modification_= 0; - } - else + ut_ad(!persistent == fsp_is_system_temporary(id().space())); + ut_ad(state >= WRITE_FIX); + + if (UNIV_LIKELY(!error)) { + ut_d(lsn_t om= oldest_modification()); + ut_ad(om >= 2); + ut_ad(persistent == (om > 2)); /* We use release memory order to guarantee that callers of oldest_modification_acquire() will observe the block as being detached from buf_pool.flush_list, after reading the value 0. */ - ut_ad(oldest_modification() > 2); - oldest_modification_.store(1, std::memory_order_release); + oldest_modification_.store(persistent, std::memory_order_release); } - const auto s= state(); - ut_ad(s >= WRITE_FIX); - zip.fix.fetch_sub((s >= WRITE_FIX_REINIT) + zip.fix.fetch_sub((state >= WRITE_FIX_REINIT) ? (WRITE_FIX_REINIT - UNFIXED) : (WRITE_FIX - UNFIXED)); lock.u_unlock(true); @@ -348,18 +340,10 @@ inline void buf_pool_t::n_flush_inc() inline void buf_pool_t::n_flush_dec() { - mysql_mutex_lock(&flush_list_mutex); + mysql_mutex_assert_owner(&flush_list_mutex); ut_ad(page_cleaner_status >= LRU_FLUSH); if ((page_cleaner_status-= LRU_FLUSH) < LRU_FLUSH) pthread_cond_broadcast(&done_flush_LRU); - mysql_mutex_unlock(&flush_list_mutex); -} - -inline void buf_pool_t::n_flush_dec_holding_mutex() -{ - mysql_mutex_assert_owner(&flush_list_mutex); - ut_ad(page_cleaner_status >= LRU_FLUSH); - page_cleaner_status-= LRU_FLUSH; } /** Complete write of a file page from buf_pool. @@ -389,28 +373,26 @@ void buf_page_write_complete(const IORequest &request, bool error) mysql_mutex_assert_not_owner(&buf_pool.mutex); mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); - if (request.is_LRU()) + const bool persistent= bpage->oldest_modification() != 2; + + if (UNIV_UNLIKELY(!persistent) && UNIV_LIKELY(!error)) { - const bool temp= bpage->oldest_modification() == 2; - if (!temp && state < buf_page_t::WRITE_FIX_REINIT && - request.node->space->use_doublewrite()) - buf_dblwr.write_completed(); /* We must hold buf_pool.mutex while releasing the block, so that no other thread can access it before we have freed it. */ mysql_mutex_lock(&buf_pool.mutex); - bpage->write_complete(temp, error); - if (!error) - buf_LRU_free_page(bpage, true); + bpage->write_complete(persistent, error, state); + buf_LRU_free_page(bpage, true); mysql_mutex_unlock(&buf_pool.mutex); - - buf_pool.n_flush_dec(); } else { + bpage->write_complete(persistent, error, state); if (state < buf_page_t::WRITE_FIX_REINIT && request.node->space->use_doublewrite()) + { + ut_ad(persistent); buf_dblwr.write_completed(); - bpage->write_complete(false, error); + } } } @@ -778,17 +760,15 @@ ATTRIBUTE_COLD void buf_pool_t::release_freed_page(buf_page_t *bpage) } /** Write a flushable page to a file or free a freeable block. -@param evict whether to evict the page on write completion @param space tablespace @return whether a page write was initiated and buf_pool.mutex released */ -bool buf_page_t::flush(bool evict, fil_space_t *space) +bool buf_page_t::flush(fil_space_t *space) { mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); ut_ad(in_file()); ut_ad(in_LRU_list); ut_ad((space->purpose == FIL_TYPE_TEMPORARY) == (space == fil_system.temp_space)); - ut_ad(evict || space != fil_system.temp_space); ut_ad(space->referenced()); const auto s= state(); @@ -835,22 +815,11 @@ bool buf_page_t::flush(bool evict, fil_space_t *space) mysql_mutex_unlock(&buf_pool.mutex); IORequest::Type type= IORequest::WRITE_ASYNC; - if (UNIV_UNLIKELY(evict)) - { - type= IORequest::WRITE_LRU; - mysql_mutex_lock(&buf_pool.flush_list_mutex); - buf_pool.n_flush_inc(); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - } /* Apart from the U-lock, this block will also be protected by is_write_fixed() and oldest_modification()>1. Thus, it cannot be relocated or removed. */ - DBUG_PRINT("ib_buf", ("%s %u page %u:%u", - evict ? "LRU" : "flush_list", - id().space(), id().page_no())); - buf_block_t *block= reinterpret_cast(this); page_t *write_frame= zip.data; @@ -902,10 +871,7 @@ bool buf_page_t::flush(bool evict, fil_space_t *space) { switch (space->chain.start->punch_hole) { case 1: - static_assert(IORequest::PUNCH_LRU - IORequest::PUNCH == - IORequest::WRITE_LRU - IORequest::WRITE_ASYNC, ""); - type= - IORequest::Type(type + (IORequest::PUNCH - IORequest::WRITE_ASYNC)); + type= IORequest::PUNCH; break; case 2: size= orig_size; @@ -932,10 +898,8 @@ bool buf_page_t::flush(bool evict, fil_space_t *space) /** Check whether a page can be flushed from the buf_pool. @param id page identifier @param fold id.fold() -@param evict true=buf_pool.LRU; false=buf_pool.flush_list @return whether the page can be flushed */ -static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, - bool evict) +static bool buf_flush_check_neighbor(const page_id_t id, ulint fold) { mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(fold == id.fold()); @@ -944,26 +908,16 @@ static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, const buf_page_t *bpage= buf_pool.page_hash.get(id, buf_pool.page_hash.cell_get(fold)); - if (!bpage || buf_pool.watch_is_sentinel(*bpage)) - return false; - - /* We avoid flushing 'non-old' blocks in an eviction flush, because the - flushed blocks are soon freed */ - if (evict && !bpage->is_old()) - return false; - - return bpage->oldest_modification() > 1 && !bpage->is_io_fixed(); + return bpage && bpage->oldest_modification() > 1 && !bpage->is_io_fixed(); } /** Check which neighbors of a page can be flushed from the buf_pool. @param space tablespace @param id page identifier of a dirty page @param contiguous whether to consider contiguous areas of pages -@param evict true=buf_pool.LRU; false=buf_pool.flush_list @return last page number that can be flushed */ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, - page_id_t &id, bool contiguous, - bool evict) + page_id_t &id, bool contiguous) { ut_ad(id.page_no() < space.size + (space.physical_size() == 2048 ? 1 @@ -996,7 +950,7 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, for (page_id_t i= id - 1;; --i) { fold--; - if (!buf_flush_check_neighbor(i, fold, evict)) + if (!buf_flush_check_neighbor(i, fold)) { low= i + 1; break; @@ -1012,7 +966,7 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, while (++i < high) { ++fold; - if (!buf_flush_check_neighbor(i, fold, evict)) + if (!buf_flush_check_neighbor(i, fold)) break; } @@ -1089,14 +1043,13 @@ and also write zeroes or punch the hole for the freed ranges of pages. @param page_id page identifier @param bpage buffer page @param contiguous whether to consider contiguous areas of pages -@param evict true=buf_pool.LRU; false=buf_pool.flush_list @param n_flushed number of pages flushed so far in this batch @param n_to_flush maximum number of pages we are allowed to flush @return number of pages flushed */ static ulint buf_flush_try_neighbors(fil_space_t *space, const page_id_t page_id, buf_page_t *bpage, - bool contiguous, bool evict, + bool contiguous, ulint n_flushed, ulint n_to_flush) { ut_ad(space->id == page_id.space()); @@ -1110,7 +1063,7 @@ static ulint buf_flush_try_neighbors(fil_space_t *space, ut_ad(lsn >= bpage->oldest_modification()); if (UNIV_UNLIKELY(lsn < space->get_create_lsn())) { - ut_a(!bpage->flush(evict, space)); + ut_a(!bpage->flush(space)); mysql_mutex_unlock(&buf_pool.mutex); return 0; } @@ -1120,7 +1073,7 @@ static ulint buf_flush_try_neighbors(fil_space_t *space, ulint count= 0; page_id_t id= page_id; - page_id_t high= buf_flush_check_neighbors(*space, id, contiguous, evict); + page_id_t high= buf_flush_check_neighbors(*space, id, contiguous); ut_ad(page_id >= id); ut_ad(page_id < high); @@ -1157,7 +1110,7 @@ static ulint buf_flush_try_neighbors(fil_space_t *space, ut_ad(!buf_pool.watch_is_sentinel(*b)); ut_ad(b->oldest_modification() > 1); flush: - if (b->flush(evict, space)) + if (b->flush(space)) { ++count; continue; @@ -1165,9 +1118,10 @@ static ulint buf_flush_try_neighbors(fil_space_t *space, } /* We avoid flushing 'non-old' blocks in an eviction flush, because the flushed blocks are soon freed */ - else if ((!evict || b->is_old()) && !buf_pool.watch_is_sentinel(*b) && - b->oldest_modification() > 1 && b->lock.u_lock_try(true)) + else if (b->oldest_modification() > 1 && b->lock.u_lock_try(true)) { + /* For the buf_pool.watch[] sentinels, oldest_modification() == 0 */ + ut_ad(!buf_pool.watch_is_sentinel(*b)); if (b->oldest_modification() < 2) b->lock.u_unlock(true); else @@ -1289,10 +1243,8 @@ static void buf_flush_discard_page(buf_page_t *bpage) /** Flush dirty blocks from the end buf_pool.LRU, and move clean blocks to buf_pool.free. @param max maximum number of blocks to flush -@param evict whether dirty pages are to be evicted after flushing them @param n counts of flushed and evicted pages */ -static void buf_flush_LRU_list_batch(ulint max, bool evict, - flush_counters_t *n) +static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) { ulint scanned= 0; ulint free_limit= srv_LRU_scan_depth; @@ -1340,8 +1292,12 @@ static void buf_flush_LRU_list_batch(ulint max, bool evict, if (state < buf_page_t::READ_FIX && bpage->lock.u_lock_try(true)) { ut_ad(!bpage->is_io_fixed()); - bool do_evict= evict; switch (bpage->oldest_modification()) { + case 2: + /* LRU flushing will always evict pages of the temporary tablespace, + in buf_page_write_complete(). */ + ++n->evicted; + break; case 1: mysql_mutex_lock(&buf_pool.flush_list_mutex); if (ut_d(lsn_t lsn=) bpage->oldest_modification()) @@ -1354,12 +1310,8 @@ static void buf_flush_LRU_list_batch(ulint max, bool evict, case 0: bpage->lock.u_unlock(true); goto evict; - case 2: - /* LRU flushing will always evict pages of the temporary tablespace. */ - do_evict= true; } - /* Block is ready for flush. Dispatch an IO request. - If do_evict, the page may be evicted by buf_page_write_complete(). */ + /* Block is ready for flush. Dispatch an IO request. */ const page_id_t page_id(bpage->id()); const uint32_t space_id= page_id.space(); if (!space || space->id != space_id) @@ -1394,6 +1346,7 @@ static void buf_flush_LRU_list_batch(ulint max, bool evict, no_space: mysql_mutex_lock(&buf_pool.flush_list_mutex); buf_flush_discard_page(bpage); + ++n->evicted; continue; } @@ -1406,8 +1359,8 @@ static void buf_flush_LRU_list_batch(ulint max, bool evict, if (neighbors && space->is_rotational()) n->flushed+= buf_flush_try_neighbors(space, page_id, bpage, neighbors == 1, - do_evict, n->flushed, max); - else if (bpage->flush(do_evict, space)) + n->flushed, max); + else if (bpage->flush(space)) ++n->flushed; else continue; @@ -1434,15 +1387,14 @@ static void buf_flush_LRU_list_batch(ulint max, bool evict, /** Flush and move pages from LRU or unzip_LRU list to the free list. Whether LRU or unzip_LRU is used depends on the state of the system. @param max maximum number of blocks to flush -@param evict whether dirty pages are to be evicted after flushing them @param n counts of flushed and evicted pages */ -static void buf_do_LRU_batch(ulint max, bool evict, flush_counters_t *n) +static void buf_do_LRU_batch(ulint max, flush_counters_t *n) { if (buf_LRU_evict_from_unzip_LRU()) buf_free_from_unzip_LRU_list_batch(); n->evicted= 0; n->flushed= 0; - buf_flush_LRU_list_batch(max, evict, n); + buf_flush_LRU_list_batch(max, n); mysql_mutex_assert_owner(&buf_pool.mutex); buf_lru_freed_page_count+= n->evicted; @@ -1554,8 +1506,8 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn) { if (neighbors && space->is_rotational()) count+= buf_flush_try_neighbors(space, page_id, bpage, - neighbors == 1, false, count, max_n); - else if (bpage->flush(false, space)) + neighbors == 1, count, max_n); + else if (bpage->flush(space)) ++count; else continue; @@ -1721,7 +1673,7 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed) goto was_freed; } mysql_mutex_unlock(&buf_pool.flush_list_mutex); - if (bpage->flush(false, space)) + if (bpage->flush(space)) { ++n_flush; if (!--max_n_flush) @@ -1779,27 +1731,22 @@ and move clean blocks to buf_pool.free. The caller must invoke buf_dblwr.flush_buffered_writes() after releasing buf_pool.mutex. @param max_n wished maximum mumber of blocks flushed -@param evict whether to evict pages after flushing -@return evict ? number of processed pages : number of pages written */ -ulint buf_flush_LRU(ulint max_n, bool evict) +@return number of pages written */ +static ulint buf_flush_LRU(ulint max_n) { mysql_mutex_assert_owner(&buf_pool.mutex); flush_counters_t n; - buf_do_LRU_batch(max_n, evict, &n); + buf_do_LRU_batch(max_n, &n); ulint pages= n.flushed; if (n.evicted) { - if (evict) - pages+= n.evicted; buf_pool.try_LRU_scan= true; pthread_cond_broadcast(&buf_pool.done_free); } - else if (!pages && !buf_pool.try_LRU_scan && - !buf_pool.LRU_warned.test_and_set(std::memory_order_acquire)) - { + else if (!pages && !buf_pool.try_LRU_scan) /* For example, with the minimum innodb_buffer_pool_size=5M and the default innodb_page_size=16k there are only a little over 316 pages in the buffer pool. The buffer pool can easily be exhausted @@ -1813,12 +1760,7 @@ ulint buf_flush_LRU(ulint max_n, bool evict) (3) This thread is the only one that could make progress, but we fail to do so because all the pages that we scanned are buffer-fixed or latched by some thread. */ - sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!" - " %zu blocks are in use and %zu free." - " Consider increasing innodb_buffer_pool_size.", - UT_LIST_GET_LEN(buf_pool.LRU), - UT_LIST_GET_LEN(buf_pool.free)); - } + buf_pool.LRU_warn(); return pages; } @@ -2437,12 +2379,12 @@ static void buf_flush_page_cleaner() n= srv_max_io_capacity; mysql_mutex_lock(&buf_pool.mutex); LRU_flush: - n= buf_flush_LRU(n, false); + n= buf_flush_LRU(n); mysql_mutex_unlock(&buf_pool.mutex); last_pages+= n; check_oldest_and_set_idle: mysql_mutex_lock(&buf_pool.flush_list_mutex); - buf_pool.n_flush_dec_holding_mutex(); + buf_pool.n_flush_dec(); oldest_lsn= buf_pool.get_oldest_modification(0); if (!oldest_lsn) goto fully_unemployed; @@ -2575,6 +2517,16 @@ static void buf_flush_page_cleaner() #endif } +ATTRIBUTE_COLD void buf_pool_t::LRU_warn() +{ + mysql_mutex_assert_owner(&mutex); + if (!LRU_warned.test_and_set(std::memory_order_acquire)) + sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!" + " %zu blocks are in use and %zu free." + " Consider increasing innodb_buffer_pool_size.", + UT_LIST_GET_LEN(LRU), UT_LIST_GET_LEN(free)); +} + /** Initialize page_cleaner. */ ATTRIBUTE_COLD void buf_flush_page_cleaner_init() { diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index ce37209f154..3fa0cf9002e 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -385,144 +385,76 @@ we put it to free list to be used. @return the free control block, in state BUF_BLOCK_MEMORY */ buf_block_t *buf_LRU_get_free_block(bool have_mutex) { - ulint n_iterations = 0; - ulint flush_failures = 0; - MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH); - if (have_mutex) { - mysql_mutex_assert_owner(&buf_pool.mutex); - goto got_mutex; - } - DBUG_EXECUTE_IF("recv_ran_out_of_buffer", - if (recv_recovery_is_on() - && recv_sys.apply_log_recs) { - mysql_mutex_lock(&buf_pool.mutex); - goto flush_lru; - }); -get_mutex: - mysql_mutex_lock(&buf_pool.mutex); -got_mutex: - buf_LRU_check_size_of_non_data_objects(); - buf_block_t* block; + bool waited= false; + MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH); + if (!have_mutex) + mysql_mutex_lock(&buf_pool.mutex); - IF_DBUG(static bool buf_lru_free_blocks_error_printed,); - DBUG_EXECUTE_IF("ib_lru_force_no_free_page", - if (!buf_lru_free_blocks_error_printed) { - n_iterations = 21; - goto not_found;}); + buf_LRU_check_size_of_non_data_objects(); + + buf_block_t *block; retry: - /* If there is a block in the free list, take it */ - if ((block = buf_LRU_get_free_only()) != nullptr) { + /* If there is a block in the free list, take it */ + block= buf_LRU_get_free_only(); + if (block) + { got_block: - const ulint LRU_size = UT_LIST_GET_LEN(buf_pool.LRU); - const ulint available = UT_LIST_GET_LEN(buf_pool.free); - const ulint scan_depth = srv_LRU_scan_depth / 2; - ut_ad(LRU_size <= BUF_LRU_MIN_LEN || available >= scan_depth - || buf_pool.need_LRU_eviction()); + const ulint LRU_size= UT_LIST_GET_LEN(buf_pool.LRU); + const ulint available= UT_LIST_GET_LEN(buf_pool.free); + const ulint scan_depth= srv_LRU_scan_depth / 2; + ut_ad(LRU_size <= BUF_LRU_MIN_LEN || + available >= scan_depth || buf_pool.need_LRU_eviction()); - if (!have_mutex) { - mysql_mutex_unlock(&buf_pool.mutex); - } + if (UNIV_UNLIKELY(available < scan_depth) && LRU_size > BUF_LRU_MIN_LEN) + { + mysql_mutex_lock(&buf_pool.flush_list_mutex); + if (!buf_pool.page_cleaner_active()) + buf_pool.page_cleaner_wakeup(true); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + } - if (UNIV_UNLIKELY(available < scan_depth) - && LRU_size > BUF_LRU_MIN_LEN) { - mysql_mutex_lock(&buf_pool.flush_list_mutex); - if (!buf_pool.page_cleaner_active()) { - buf_pool.page_cleaner_wakeup(true); - } - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - } + if (!have_mutex) + mysql_mutex_unlock(&buf_pool.mutex); - block->page.zip.clear(); - return block; - } + block->page.zip.clear(); + return block; + } - MONITOR_INC( MONITOR_LRU_GET_FREE_LOOPS ); - if (n_iterations || buf_pool.try_LRU_scan) { - /* If no block was in the free list, search from the - end of the LRU list and try to free a block there. - If we are doing for the first time we'll scan only - tail of the LRU list otherwise we scan the whole LRU - list. */ - if (buf_LRU_scan_and_free_block(n_iterations - ? ULINT_UNDEFINED : 100)) { - goto retry; - } + MONITOR_INC(MONITOR_LRU_GET_FREE_LOOPS); + if (waited || buf_pool.try_LRU_scan) + { + /* If no block was in the free list, search from the end of the + LRU list and try to free a block there. If we are doing for the + first time we'll scan only tail of the LRU list otherwise we scan + the whole LRU list. */ + if (buf_LRU_scan_and_free_block(waited ? ULINT_UNDEFINED : 100)) + goto retry; - /* Tell other threads that there is no point - in scanning the LRU list. */ - buf_pool.try_LRU_scan = false; - } + /* Tell other threads that there is no point in scanning the LRU + list. */ + buf_pool.try_LRU_scan= false; + } - for (;;) { - if ((block = buf_LRU_get_free_only()) != nullptr) { - goto got_block; - } - const bool wake = buf_pool.need_LRU_eviction(); - mysql_mutex_unlock(&buf_pool.mutex); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - const auto n_flush = buf_pool.n_flush(); - if (wake && !buf_pool.page_cleaner_active()) { - buf_pool.page_cleaner_wakeup(true); - } - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - mysql_mutex_lock(&buf_pool.mutex); - if (!n_flush) { - goto not_found; - } - if (!buf_pool.try_LRU_scan) { - my_cond_wait(&buf_pool.done_free, - &buf_pool.mutex.m_mutex); - } - } + waited= true; -not_found: - if (n_iterations > 1) { - MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS ); - } + while (!(block= buf_LRU_get_free_only())) + { + buf_pool.stat.LRU_waits++; - if (n_iterations == 21 - && srv_buf_pool_old_size == srv_buf_pool_size - && buf_pool.LRU_warned.test_and_set(std::memory_order_acquire)) { - IF_DBUG(buf_lru_free_blocks_error_printed = true,); - mysql_mutex_unlock(&buf_pool.mutex); - ib::warn() << "Difficult to find free blocks in the buffer pool" - " (" << n_iterations << " search iterations)! " - << flush_failures << " failed attempts to" - " flush a page!" - " Consider increasing innodb_buffer_pool_size." - " Pending flushes (fsync) log: " - << log_sys.get_pending_flushes() - << "; buffer pool: " - << fil_n_pending_tablespace_flushes - << ". " << os_n_file_reads << " OS file reads, " - << os_n_file_writes << " OS file writes, " - << os_n_fsyncs - << " OS fsyncs."; - mysql_mutex_lock(&buf_pool.mutex); - } + timespec abstime; + set_timespec(abstime, 1); - /* No free block was found: try to flush the LRU list. - The freed blocks will be up for grabs for all threads. + mysql_mutex_lock(&buf_pool.flush_list_mutex); + if (!buf_pool.page_cleaner_active()) + buf_pool.page_cleaner_wakeup(true); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + if (my_cond_timedwait(&buf_pool.done_free, &buf_pool.mutex.m_mutex, + &abstime)) + buf_pool.LRU_warn(); + } - TODO: A more elegant way would have been to return one freed - up block to the caller here but the code that deals with - removing the block from buf_pool.page_hash and buf_pool.LRU is fairly - involved (particularly in case of ROW_FORMAT=COMPRESSED pages). We - can do that in a separate patch sometime in future. */ -#ifndef DBUG_OFF -flush_lru: -#endif - if (!buf_flush_LRU(innodb_lru_flush_size, true)) { - MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT); - ++flush_failures; - } - - n_iterations++; - buf_pool.stat.LRU_waits++; - mysql_mutex_unlock(&buf_pool.mutex); - buf_dblwr.flush_buffered_writes(); - goto get_mutex; + goto got_block; } /** Move the LRU_old pointer so that the length of the old blocks list diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 4986e83cfb9..51e30c88a86 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -766,17 +766,16 @@ public: @retval DB_FAIL if the page contains the wrong ID */ dberr_t read_complete(const fil_node_t &node); - /** Note that a block is no longer dirty, while not removing - it from buf_pool.flush_list - @param temporary whether the page belongs to the temporary tablespace - @param error whether an error may have occurred while writing */ - inline void write_complete(bool temporary, bool error); + /** Release a write fix after a page write was completed. + @param persistent whether the page belongs to a persistent tablespace + @param error whether an error may have occurred while writing + @param state recently read state() value with the correct io-fix */ + void write_complete(bool persistent, bool error, uint32_t state); /** Write a flushable page to a file or free a freeable block. - @param evict whether to evict the page on write completion @param space tablespace @return whether a page write was initiated and buf_pool.mutex released */ - bool flush(bool evict, fil_space_t *space); + bool flush(fil_space_t *space); /** Notify that a page in a temporary tablespace has been modified. */ void set_temp_modified() @@ -1740,10 +1739,6 @@ public: /** Decrement the number of pending LRU flush */ inline void n_flush_dec(); - /** Decrement the number of pending LRU flush - while holding flush_list_mutex */ - inline void n_flush_dec_holding_mutex(); - /** @return whether flush_list flushing is active */ bool flush_list_active() const { @@ -1893,6 +1888,9 @@ public: /** Free a page whose underlying file page has been freed. */ ATTRIBUTE_COLD void release_freed_page(buf_page_t *bpage); + /** Issue a warning that we could not free up buffer pool pages. */ + ATTRIBUTE_COLD void LRU_warn(); + private: /** Temporary memory for page_compressed and encrypted I/O */ struct io_buf_t diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index 74c79e4acc8..2e25b3aed08 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -85,16 +85,6 @@ buf_flush_init_for_writing( bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed= nullptr) MY_ATTRIBUTE((warn_unused_result)); -/** Write out dirty blocks from buf_pool.LRU, -and move clean blocks to buf_pool.free. -The caller must invoke buf_dblwr.flush_buffered_writes() -after releasing buf_pool.mutex. -@param max_n wished maximum mumber of blocks flushed -@param evict whether to evict pages after flushing -@return evict ? number of processed pages : number of pages written -@retval 0 if a buf_pool.LRU batch is already running */ -ulint buf_flush_LRU(ulint max_n, bool evict); - /** Wait until a LRU flush batch ends. */ void buf_flush_wait_LRU_batch_end(); /** Wait until all persistent pages are flushed up to a limit. diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index e0f398301e3..7c2a3950265 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -200,14 +200,10 @@ public: WRITE_ASYNC= WRITE_SYNC | 1, /** A doublewrite batch */ DBLWR_BATCH= WRITE_ASYNC | 8, - /** Write data; evict the block on write completion */ - WRITE_LRU= WRITE_ASYNC | 32, /** Write data and punch hole for the rest */ - PUNCH= WRITE_ASYNC | 64, - /** Write data and punch hole; evict the block on write completion */ - PUNCH_LRU= PUNCH | WRITE_LRU, + PUNCH= WRITE_ASYNC | 16, /** Zero out a range of bytes in fil_space_t::io() */ - PUNCH_RANGE= WRITE_SYNC | 128, + PUNCH_RANGE= WRITE_SYNC | 32, }; constexpr IORequest(buf_page_t *bpage, buf_tmp_buffer_t *slot, @@ -220,7 +216,6 @@ public: bool is_read() const { return (type & READ_SYNC) != 0; } bool is_write() const { return (type & WRITE_SYNC) != 0; } - bool is_LRU() const { return (type & (WRITE_LRU ^ WRITE_ASYNC)) != 0; } bool is_async() const { return (type & (READ_SYNC ^ READ_ASYNC)) != 0; } void write_complete(int io_error) const; diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h index 971f6363bdb..23c7edb7137 100644 --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -196,7 +196,6 @@ enum monitor_id_t { MONITOR_FLUSH_ADAPTIVE_AVG_PASS, MONITOR_LRU_GET_FREE_LOOPS, - MONITOR_LRU_GET_FREE_WAITS, MONITOR_FLUSH_AVG_PAGE_RATE, MONITOR_FLUSH_LSN_AVG_RATE, @@ -217,7 +216,6 @@ enum monitor_id_t { MONITOR_LRU_BATCH_SCANNED_PER_CALL, MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE, MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, - MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT, MONITOR_LRU_GET_FREE_SEARCH, MONITOR_LRU_SEARCH_SCANNED, MONITOR_LRU_SEARCH_SCANNED_NUM_CALL, diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index e8534318ff3..145e2b04051 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -376,11 +376,6 @@ static monitor_info_t innodb_counter_info[] = MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_LOOPS}, - {"buffer_LRU_get_free_waits", "buffer", - "Total sleep waits in LRU get free.", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_WAITS}, - {"buffer_flush_avg_page_rate", "buffer", "Average number of pages at which flushing is happening", MONITOR_NONE, @@ -484,11 +479,6 @@ static monitor_info_t innodb_counter_info[] = MONITOR_EXISTING | MONITOR_DEFAULT_ON), MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE}, - {"buffer_LRU_single_flush_failure_count", "Buffer", - "Number of times attempt to flush a single page from LRU failed", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT}, - {"buffer_LRU_get_free_search", "Buffer", "Number of searches performed for a clean page", MONITOR_NONE, diff --git a/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result b/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result index 52950a41c5e..4a934ce8940 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result @@ -70,7 +70,6 @@ buffer_flush_n_to_flush_by_age buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NUL buffer_flush_adaptive_avg_time buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Avg time (ms) spent for adaptive flushing recently. buffer_flush_adaptive_avg_pass buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of adaptive flushes passed during the recent Avg period. buffer_LRU_get_free_loops buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Total loops in LRU get free. -buffer_LRU_get_free_waits buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Total sleep waits in LRU get free. buffer_flush_avg_page_rate buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Average number of pages at which flushing is happening buffer_flush_lsn_avg_rate buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Average redo generation rate buffer_flush_pct_for_dirty buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Percent of IO capacity used to avoid max dirty page limit @@ -90,7 +89,6 @@ buffer_LRU_batch_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NUL buffer_LRU_batch_scanned_per_call buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 set_member Pages scanned per LRU batch call buffer_LRU_batch_flush_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Total pages flushed as part of LRU batches buffer_LRU_batch_evict_total_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Total pages evicted as part of LRU batches -buffer_LRU_single_flush_failure_count Buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of times attempt to flush a single page from LRU failed buffer_LRU_get_free_search Buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of searches performed for a clean page buffer_LRU_search_scanned buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 set_owner Total pages scanned as part of LRU search buffer_LRU_search_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 set_member Number of times LRU search is performed