mirror of
https://github.com/MariaDB/server.git
synced 2025-08-07 00:04:31 +03:00
MDEV-36226 Stall and crash when page cleaner fails to generate free pages during Async flush
During regular iteration the page cleaner does flush from flush list with some flush target and then goes for generating free pages from LRU tail. When asynchronous flush is triggered i.e. when 7/8 th of the LSN margin is filled in the redo log, the flush target for flush list is set to innodb_io_capacity_max. If it could flush all, the flush bandwidth for LRU flush is currently set to zero. If the LRU tail has dirty pages, page cleaner ends up freeing no pages in one iteration. The scenario could repeat across multiple iterations till async flush target is reached. During this time the DB system is starved of free pages resulting in apparent stall and in some cases dict_sys latch fatal error. Fix: In page cleaner iteration, before LRU flush, ensure we provide enough flush limit so that freeing pages is no blocked by dirty pages in LRU tail. Log IO and flush state if double write flush wait is long. Reviewed by: Marko Mäkelä
This commit is contained in:
@@ -4464,6 +4464,7 @@ sub extract_warning_lines ($$) {
|
||||
qr/InnoDB: innodb_open_files .* should not be greater than/,
|
||||
qr/InnoDB: Trying to delete tablespace.*but there are.*pending/,
|
||||
qr/InnoDB: Tablespace 1[0-9]* was not found at .*, and innodb_force_recovery was set/,
|
||||
qr/InnoDB: Long wait \([0-9]+ seconds\) for double-write buffer flush/,
|
||||
qr/Slave: Unknown table 't1' .* 1051/,
|
||||
qr/Slave SQL:.*(Internal MariaDB error code: [[:digit:]]+|Query:.*)/,
|
||||
qr/slave SQL thread aborted/,
|
||||
|
@@ -603,20 +603,67 @@ static void buf_dblwr_check_block(const buf_page_t *bpage) noexcept
|
||||
}
|
||||
#endif /* UNIV_DEBUG */
|
||||
|
||||
ATTRIBUTE_COLD void buf_dblwr_t::print_info() const noexcept
|
||||
{
|
||||
mysql_mutex_assert_owner(&mutex);
|
||||
const slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0];
|
||||
|
||||
sql_print_information("InnoDB: Double Write State\n"
|
||||
"-------------------\n"
|
||||
"Batch running : %s\n"
|
||||
"Active Slot - first_free: %zu reserved: %zu\n"
|
||||
"Flush Slot - first_free: %zu reserved: %zu\n"
|
||||
"-------------------",
|
||||
(batch_running ? "true" : "false"),
|
||||
active_slot->first_free, active_slot->reserved,
|
||||
flush_slot->first_free, flush_slot->reserved);
|
||||
}
|
||||
|
||||
bool buf_dblwr_t::flush_buffered_writes(const ulint size) noexcept
|
||||
{
|
||||
mysql_mutex_assert_owner(&mutex);
|
||||
ut_ad(size == block_size());
|
||||
|
||||
for (;;)
|
||||
const size_t max_count= 60 * 60;
|
||||
const size_t first_log_count= 30;
|
||||
const size_t fatal_threshold=
|
||||
static_cast<size_t>(srv_fatal_semaphore_wait_threshold);
|
||||
size_t log_count= first_log_count;
|
||||
|
||||
for (ulong count= 0;;)
|
||||
{
|
||||
if (!active_slot->first_free)
|
||||
return false;
|
||||
if (!batch_running)
|
||||
break;
|
||||
my_cond_wait(&cond, &mutex.m_mutex);
|
||||
}
|
||||
|
||||
timespec abstime;
|
||||
set_timespec(abstime, 1);
|
||||
my_cond_timedwait(&cond, &mutex.m_mutex, &abstime);
|
||||
|
||||
if (count > fatal_threshold)
|
||||
{
|
||||
buf_pool.print_flush_info();
|
||||
print_info();
|
||||
ib::fatal() << "InnoDB: Long wait (" << count
|
||||
<< " seconds) for double-write buffer flush.";
|
||||
}
|
||||
else if (++count < first_log_count && !(count % 5))
|
||||
{
|
||||
sql_print_information("InnoDB: Long wait (%zu seconds) for double-write"
|
||||
" buffer flush.", count);
|
||||
buf_pool.print_flush_info();
|
||||
print_info();
|
||||
}
|
||||
else if (!(count % log_count))
|
||||
{
|
||||
sql_print_warning("InnoDB: Long wait (%zu seconds) for double-write"
|
||||
" buffer flush.", count);
|
||||
buf_pool.print_flush_info();
|
||||
print_info();
|
||||
log_count= log_count >= max_count ? max_count : log_count * 2;
|
||||
}
|
||||
}
|
||||
ut_ad(active_slot->reserved == active_slot->first_free);
|
||||
ut_ad(!flushing_buffered_writes);
|
||||
|
||||
|
@@ -1374,7 +1374,10 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) noexcept
|
||||
break;
|
||||
}
|
||||
|
||||
if (neighbors && space->is_rotational())
|
||||
if (neighbors && space->is_rotational() &&
|
||||
/* Skip neighbourhood flush from LRU list if we haven't yet reached
|
||||
half of the free page target. */
|
||||
UT_LIST_GET_LEN(buf_pool.free) * 2 >= free_limit)
|
||||
n->flushed+= buf_flush_try_neighbors(space, page_id, bpage,
|
||||
neighbors == 1,
|
||||
n->flushed, max);
|
||||
@@ -1763,8 +1766,16 @@ static ulint buf_flush_LRU(ulint max_n) noexcept
|
||||
buf_do_LRU_batch(max_n, &n);
|
||||
|
||||
ulint pages= n.flushed;
|
||||
ulint evicted= n.evicted;
|
||||
|
||||
if (n.evicted)
|
||||
/* If we have exhausted flush quota, it is likely we exited before
|
||||
generating enough free pages. Call once more with 0 flush to generate
|
||||
free pages immediately as required. */
|
||||
if (pages >= max_n)
|
||||
buf_do_LRU_batch(0, &n);
|
||||
|
||||
evicted+= n.evicted;
|
||||
if (evicted)
|
||||
{
|
||||
buf_pool.try_LRU_scan= true;
|
||||
pthread_cond_broadcast(&buf_pool.done_free);
|
||||
@@ -2339,6 +2350,11 @@ static void buf_flush_page_cleaner() noexcept
|
||||
DBUG_EXECUTE_IF("ib_page_cleaner_sleep",
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
/* Cover the logging code in debug mode. */
|
||||
buf_pool.print_flush_info();
|
||||
buf_dblwr.lock();
|
||||
buf_dblwr.print_info();
|
||||
buf_dblwr.unlock();
|
||||
});
|
||||
lsn_limit= buf_flush_sync_lsn;
|
||||
|
||||
@@ -2543,6 +2559,10 @@ static void buf_flush_page_cleaner() noexcept
|
||||
|
||||
n= srv_max_io_capacity;
|
||||
n= n >= n_flushed ? n - n_flushed : 0;
|
||||
/* It is critical to generate free pages to keep the system alive. Make
|
||||
sure we are not hindered by dirty pages in LRU tail. */
|
||||
n= std::max<ulint>(n, std::min<ulint>(srv_max_io_capacity,
|
||||
buf_pool.LRU_scan_depth));
|
||||
goto LRU_flush;
|
||||
}
|
||||
|
||||
@@ -2581,10 +2601,11 @@ ATTRIBUTE_COLD void buf_pool_t::LRU_warn() noexcept
|
||||
mysql_mutex_assert_owner(&mutex);
|
||||
try_LRU_scan= false;
|
||||
if (!LRU_warned.test_and_set(std::memory_order_acquire))
|
||||
{
|
||||
sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!"
|
||||
" %zu blocks are in use and %zu free."
|
||||
" Consider increasing innodb_buffer_pool_size.",
|
||||
UT_LIST_GET_LEN(LRU), UT_LIST_GET_LEN(free));
|
||||
" Consider increasing innodb_buffer_pool_size.");
|
||||
buf_pool.print_flush_info();
|
||||
}
|
||||
}
|
||||
|
||||
/** Initialize page_cleaner. */
|
||||
@@ -2664,6 +2685,53 @@ void buf_flush_sync() noexcept
|
||||
thd_wait_end(nullptr);
|
||||
}
|
||||
|
||||
ATTRIBUTE_COLD void buf_pool_t::print_flush_info() const noexcept
|
||||
{
|
||||
/* We do dirty read of UT_LIST count variable. */
|
||||
size_t lru_size= UT_LIST_GET_LEN(LRU);
|
||||
size_t dirty_size= UT_LIST_GET_LEN(flush_list);
|
||||
size_t free_size= UT_LIST_GET_LEN(free);
|
||||
size_t dirty_pct= lru_size ? dirty_size * 100 / (lru_size + free_size) : 0;
|
||||
sql_print_information("InnoDB: Buffer Pool pages\n"
|
||||
"-------------------\n"
|
||||
"LRU Pages : %zu\n"
|
||||
"Free Pages : %zu\n"
|
||||
"Dirty Pages: %zu : %zu%%\n"
|
||||
"-------------------",
|
||||
lru_size, free_size, dirty_size, dirty_pct);
|
||||
|
||||
lsn_t lsn= log_sys.get_lsn();
|
||||
lsn_t clsn= log_sys.last_checkpoint_lsn;
|
||||
sql_print_information("InnoDB: LSN flush parameters\n"
|
||||
"-------------------\n"
|
||||
"System LSN : %" PRIu64 "\n"
|
||||
"Checkpoint LSN: %" PRIu64 "\n"
|
||||
"Flush ASync LSN: %" PRIu64 "\n"
|
||||
"Flush Sync LSN: %" PRIu64 "\n"
|
||||
"-------------------",
|
||||
lsn, clsn, buf_flush_async_lsn.load(), buf_flush_sync_lsn.load());
|
||||
|
||||
lsn_t age= lsn - clsn;
|
||||
lsn_t age_pct= log_sys.max_checkpoint_age
|
||||
? age * 100 / log_sys.max_checkpoint_age : 0;
|
||||
sql_print_information("InnoDB: LSN age parameters\n"
|
||||
"-------------------\n"
|
||||
"Current Age : %" PRIu64 " : %" PRIu64 "%%\n"
|
||||
"Max Age(Async): %" PRIu64 "\n"
|
||||
"Max Age(Sync) : %" PRIu64 "\n"
|
||||
"Capacity : %" PRIu64 "\n"
|
||||
"-------------------",
|
||||
age, age_pct, log_sys.max_modified_age_async, log_sys.max_checkpoint_age,
|
||||
log_sys.log_capacity);
|
||||
|
||||
sql_print_information("InnoDB: Pending IO count\n"
|
||||
"-------------------\n"
|
||||
"Pending Read : %zu\n"
|
||||
"Pending Write: %zu\n"
|
||||
"-------------------",
|
||||
os_aio_pending_reads_approx(), os_aio_pending_writes_approx());
|
||||
}
|
||||
|
||||
#ifdef UNIV_DEBUG
|
||||
/** Functor to validate the flush list. */
|
||||
struct Check {
|
||||
|
@@ -44,6 +44,7 @@ Created 1/8/1996 Heikki Tuuri
|
||||
#include "btr0cur.h"
|
||||
#include "btr0sea.h"
|
||||
#include "buf0buf.h"
|
||||
#include "buf0flu.h"
|
||||
#include "data0type.h"
|
||||
#include "dict0boot.h"
|
||||
#include "dict0load.h"
|
||||
@@ -1024,7 +1025,10 @@ void dict_sys_t::lock_wait(SRW_LOCK_ARGS(const char *file, unsigned line)) noexc
|
||||
const ulong threshold= srv_fatal_semaphore_wait_threshold;
|
||||
|
||||
if (waited >= threshold)
|
||||
{
|
||||
buf_pool.print_flush_info();
|
||||
ib::fatal() << fatal_msg;
|
||||
}
|
||||
|
||||
if (waited > threshold / 4)
|
||||
ib::warn() << "A long wait (" << waited
|
||||
|
@@ -1951,6 +1951,9 @@ public:
|
||||
/** Issue a warning that we could not free up buffer pool pages. */
|
||||
ATTRIBUTE_COLD void LRU_warn() noexcept;
|
||||
|
||||
/** Print buffer pool flush state information. */
|
||||
ATTRIBUTE_COLD void print_flush_info() const noexcept;
|
||||
|
||||
private:
|
||||
/** Temporary memory for page_compressed and encrypted I/O */
|
||||
struct io_buf_t
|
||||
|
@@ -159,6 +159,9 @@ public:
|
||||
my_cond_wait(&cond, &mutex.m_mutex);
|
||||
mysql_mutex_unlock(&mutex);
|
||||
}
|
||||
|
||||
/** Print double write state information. */
|
||||
ATTRIBUTE_COLD void print_info() const noexcept;
|
||||
};
|
||||
|
||||
/** The doublewrite buffer */
|
||||
|
@@ -1021,6 +1021,8 @@ size_t os_aio_pending_reads() noexcept;
|
||||
size_t os_aio_pending_reads_approx() noexcept;
|
||||
/** @return number of pending writes */
|
||||
size_t os_aio_pending_writes() noexcept;
|
||||
/** @return approximate number of pending writes */
|
||||
size_t os_aio_pending_writes_approx() noexcept;
|
||||
|
||||
/** Wait until there are no pending asynchronous writes.
|
||||
@param declare whether the wait will be declared in tpool */
|
||||
|
@@ -3481,6 +3481,12 @@ size_t os_aio_pending_writes() noexcept
|
||||
return pending;
|
||||
}
|
||||
|
||||
/** @return approximate number of pending writes */
|
||||
size_t os_aio_pending_writes_approx() noexcept
|
||||
{
|
||||
return write_slots->pending_io_count();
|
||||
}
|
||||
|
||||
/** Wait until all pending asynchronous reads have completed.
|
||||
@param declare whether the wait will be declared in tpool */
|
||||
void os_aio_wait_until_no_pending_reads(bool declare) noexcept
|
||||
|
@@ -1220,6 +1220,7 @@ void srv_monitor_task(void*)
|
||||
now -= start;
|
||||
ulong waited = static_cast<ulong>(now / 1000000);
|
||||
if (waited >= threshold) {
|
||||
buf_pool.print_flush_info();
|
||||
ib::fatal() << dict_sys.fatal_msg;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user