diff --git a/mysql-test/mariadb-test-run.pl b/mysql-test/mariadb-test-run.pl index a47be67a050..3694ed31c01 100755 --- a/mysql-test/mariadb-test-run.pl +++ b/mysql-test/mariadb-test-run.pl @@ -4464,6 +4464,7 @@ sub extract_warning_lines ($$) { qr/InnoDB: innodb_open_files .* should not be greater than/, qr/InnoDB: Trying to delete tablespace.*but there are.*pending/, qr/InnoDB: Tablespace 1[0-9]* was not found at .*, and innodb_force_recovery was set/, + qr/InnoDB: Long wait \([0-9]+ seconds\) for double-write buffer flush/, qr/Slave: Unknown table 't1' .* 1051/, qr/Slave SQL:.*(Internal MariaDB error code: [[:digit:]]+|Query:.*)/, qr/slave SQL thread aborted/, diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index b04460a020f..8cc44eb7972 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -603,20 +603,67 @@ static void buf_dblwr_check_block(const buf_page_t *bpage) noexcept } #endif /* UNIV_DEBUG */ +ATTRIBUTE_COLD void buf_dblwr_t::print_info() const noexcept +{ + mysql_mutex_assert_owner(&mutex); + const slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0]; + + sql_print_information("InnoDB: Double Write State\n" + "-------------------\n" + "Batch running : %s\n" + "Active Slot - first_free: %zu reserved: %zu\n" + "Flush Slot - first_free: %zu reserved: %zu\n" + "-------------------", + (batch_running ? "true" : "false"), + active_slot->first_free, active_slot->reserved, + flush_slot->first_free, flush_slot->reserved); +} + bool buf_dblwr_t::flush_buffered_writes(const ulint size) noexcept { mysql_mutex_assert_owner(&mutex); ut_ad(size == block_size()); - for (;;) + const size_t max_count= 60 * 60; + const size_t first_log_count= 30; + const size_t fatal_threshold= + static_cast(srv_fatal_semaphore_wait_threshold); + size_t log_count= first_log_count; + + for (ulong count= 0;;) { if (!active_slot->first_free) return false; if (!batch_running) break; - my_cond_wait(&cond, &mutex.m_mutex); - } + timespec abstime; + set_timespec(abstime, 1); + my_cond_timedwait(&cond, &mutex.m_mutex, &abstime); + + if (count > fatal_threshold) + { + buf_pool.print_flush_info(); + print_info(); + ib::fatal() << "InnoDB: Long wait (" << count + << " seconds) for double-write buffer flush."; + } + else if (++count < first_log_count && !(count % 5)) + { + sql_print_information("InnoDB: Long wait (%zu seconds) for double-write" + " buffer flush.", count); + buf_pool.print_flush_info(); + print_info(); + } + else if (!(count % log_count)) + { + sql_print_warning("InnoDB: Long wait (%zu seconds) for double-write" + " buffer flush.", count); + buf_pool.print_flush_info(); + print_info(); + log_count= log_count >= max_count ? max_count : log_count * 2; + } + } ut_ad(active_slot->reserved == active_slot->first_free); ut_ad(!flushing_buffered_writes); diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index edeb210f6f7..71ee98b18be 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1374,7 +1374,10 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) noexcept break; } - if (neighbors && space->is_rotational()) + if (neighbors && space->is_rotational() && + /* Skip neighbourhood flush from LRU list if we haven't yet reached + half of the free page target. */ + UT_LIST_GET_LEN(buf_pool.free) * 2 >= free_limit) n->flushed+= buf_flush_try_neighbors(space, page_id, bpage, neighbors == 1, n->flushed, max); @@ -1763,8 +1766,16 @@ static ulint buf_flush_LRU(ulint max_n) noexcept buf_do_LRU_batch(max_n, &n); ulint pages= n.flushed; + ulint evicted= n.evicted; - if (n.evicted) + /* If we have exhausted flush quota, it is likely we exited before + generating enough free pages. Call once more with 0 flush to generate + free pages immediately as required. */ + if (pages >= max_n) + buf_do_LRU_batch(0, &n); + + evicted+= n.evicted; + if (evicted) { buf_pool.try_LRU_scan= true; pthread_cond_broadcast(&buf_pool.done_free); @@ -2339,6 +2350,11 @@ static void buf_flush_page_cleaner() noexcept DBUG_EXECUTE_IF("ib_page_cleaner_sleep", { std::this_thread::sleep_for(std::chrono::seconds(1)); + /* Cover the logging code in debug mode. */ + buf_pool.print_flush_info(); + buf_dblwr.lock(); + buf_dblwr.print_info(); + buf_dblwr.unlock(); }); lsn_limit= buf_flush_sync_lsn; @@ -2543,6 +2559,10 @@ static void buf_flush_page_cleaner() noexcept n= srv_max_io_capacity; n= n >= n_flushed ? n - n_flushed : 0; + /* It is critical to generate free pages to keep the system alive. Make + sure we are not hindered by dirty pages in LRU tail. */ + n= std::max(n, std::min(srv_max_io_capacity, + buf_pool.LRU_scan_depth)); goto LRU_flush; } @@ -2581,10 +2601,11 @@ ATTRIBUTE_COLD void buf_pool_t::LRU_warn() noexcept mysql_mutex_assert_owner(&mutex); try_LRU_scan= false; if (!LRU_warned.test_and_set(std::memory_order_acquire)) + { sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!" - " %zu blocks are in use and %zu free." - " Consider increasing innodb_buffer_pool_size.", - UT_LIST_GET_LEN(LRU), UT_LIST_GET_LEN(free)); + " Consider increasing innodb_buffer_pool_size."); + buf_pool.print_flush_info(); + } } /** Initialize page_cleaner. */ @@ -2664,6 +2685,53 @@ void buf_flush_sync() noexcept thd_wait_end(nullptr); } +ATTRIBUTE_COLD void buf_pool_t::print_flush_info() const noexcept +{ + /* We do dirty read of UT_LIST count variable. */ + size_t lru_size= UT_LIST_GET_LEN(LRU); + size_t dirty_size= UT_LIST_GET_LEN(flush_list); + size_t free_size= UT_LIST_GET_LEN(free); + size_t dirty_pct= lru_size ? dirty_size * 100 / (lru_size + free_size) : 0; + sql_print_information("InnoDB: Buffer Pool pages\n" + "-------------------\n" + "LRU Pages : %zu\n" + "Free Pages : %zu\n" + "Dirty Pages: %zu : %zu%%\n" + "-------------------", + lru_size, free_size, dirty_size, dirty_pct); + + lsn_t lsn= log_sys.get_lsn(); + lsn_t clsn= log_sys.last_checkpoint_lsn; + sql_print_information("InnoDB: LSN flush parameters\n" + "-------------------\n" + "System LSN : %" PRIu64 "\n" + "Checkpoint LSN: %" PRIu64 "\n" + "Flush ASync LSN: %" PRIu64 "\n" + "Flush Sync LSN: %" PRIu64 "\n" + "-------------------", + lsn, clsn, buf_flush_async_lsn.load(), buf_flush_sync_lsn.load()); + + lsn_t age= lsn - clsn; + lsn_t age_pct= log_sys.max_checkpoint_age + ? age * 100 / log_sys.max_checkpoint_age : 0; + sql_print_information("InnoDB: LSN age parameters\n" + "-------------------\n" + "Current Age : %" PRIu64 " : %" PRIu64 "%%\n" + "Max Age(Async): %" PRIu64 "\n" + "Max Age(Sync) : %" PRIu64 "\n" + "Capacity : %" PRIu64 "\n" + "-------------------", + age, age_pct, log_sys.max_modified_age_async, log_sys.max_checkpoint_age, + log_sys.log_capacity); + + sql_print_information("InnoDB: Pending IO count\n" + "-------------------\n" + "Pending Read : %zu\n" + "Pending Write: %zu\n" + "-------------------", + os_aio_pending_reads_approx(), os_aio_pending_writes_approx()); +} + #ifdef UNIV_DEBUG /** Functor to validate the flush list. */ struct Check { diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index a08b8812e6d..4f293c41854 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -44,6 +44,7 @@ Created 1/8/1996 Heikki Tuuri #include "btr0cur.h" #include "btr0sea.h" #include "buf0buf.h" +#include "buf0flu.h" #include "data0type.h" #include "dict0boot.h" #include "dict0load.h" @@ -1024,7 +1025,10 @@ void dict_sys_t::lock_wait(SRW_LOCK_ARGS(const char *file, unsigned line)) noexc const ulong threshold= srv_fatal_semaphore_wait_threshold; if (waited >= threshold) + { + buf_pool.print_flush_info(); ib::fatal() << fatal_msg; + } if (waited > threshold / 4) ib::warn() << "A long wait (" << waited diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index eed41113659..605ce772b37 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1951,6 +1951,9 @@ public: /** Issue a warning that we could not free up buffer pool pages. */ ATTRIBUTE_COLD void LRU_warn() noexcept; + /** Print buffer pool flush state information. */ + ATTRIBUTE_COLD void print_flush_info() const noexcept; + private: /** Temporary memory for page_compressed and encrypted I/O */ struct io_buf_t diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h index 2737622a4d4..cef5d91214a 100644 --- a/storage/innobase/include/buf0dblwr.h +++ b/storage/innobase/include/buf0dblwr.h @@ -159,6 +159,9 @@ public: my_cond_wait(&cond, &mutex.m_mutex); mysql_mutex_unlock(&mutex); } + + /** Print double write state information. */ + ATTRIBUTE_COLD void print_info() const noexcept; }; /** The doublewrite buffer */ diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 837d22f2d3b..ab25ba28bd4 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -1021,6 +1021,8 @@ size_t os_aio_pending_reads() noexcept; size_t os_aio_pending_reads_approx() noexcept; /** @return number of pending writes */ size_t os_aio_pending_writes() noexcept; +/** @return approximate number of pending writes */ +size_t os_aio_pending_writes_approx() noexcept; /** Wait until there are no pending asynchronous writes. @param declare whether the wait will be declared in tpool */ diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index abfc0d939eb..416a3f8f130 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -3481,6 +3481,12 @@ size_t os_aio_pending_writes() noexcept return pending; } +/** @return approximate number of pending writes */ +size_t os_aio_pending_writes_approx() noexcept +{ + return write_slots->pending_io_count(); +} + /** Wait until all pending asynchronous reads have completed. @param declare whether the wait will be declared in tpool */ void os_aio_wait_until_no_pending_reads(bool declare) noexcept diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index cc35a659a71..a6a588ab729 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1220,6 +1220,7 @@ void srv_monitor_task(void*) now -= start; ulong waited = static_cast(now / 1000000); if (waited >= threshold) { + buf_pool.print_flush_info(); ib::fatal() << dict_sys.fatal_msg; }