mirror of
https://github.com/MariaDB/server.git
synced 2025-08-05 13:16:09 +03:00
MDEV-34265 Possible hang during IO burst with innodb_flush_sync enabled
When checkpoint age goes beyond the sync flush threshold and buf_flush_sync_lsn is set, page cleaner enters into "furious flush" stage to aggressively flush dirty pages from flush list and pull checkpoint LSN above safe margin. In this stage, page cleaner skips doing LRU flush and eviction. In 10.6, all other threads entirely rely on page cleaner to generate free pages. If free pages get over while page cleaner is busy in "furious flush" stage, a session thread could wait for free page in the middle of a min-transaction(mtr) while holding latches on other pages. It, in turn, can prevent page cleaner to flush such pages preventing checkpoint LSN to move forward creating a deadlock situation. Even otherwise, it could create a stall and hang like situation for large BP with plenty of dirty pages to flush before the stage could finish. Fix: During furious flush, check and evict LRU pages after each flush iteration.
This commit is contained in:
@@ -268,4 +268,17 @@ PARTITION BY KEY(a) PARTITIONS 16;
|
||||
INSERT INTO t1 VALUES(1);
|
||||
UPDATE t1 SET a = 2 WHERE a = 1;
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# MDEV-34265 Possible hang during IO burst with innodb_flush_sync enabled
|
||||
#
|
||||
CREATE TABLE t1(f1 MEDIUMTEXT)ENGINE=InnoDB;
|
||||
SET @save_dbug=@@GLOBAL.debug_dbug;
|
||||
SET @@GLOBAL.debug_dbug='+d,ib_page_cleaner_sleep';
|
||||
SET STATEMENT debug_dbug='+d,ib_free_page_sleep' FOR
|
||||
INSERT INTO t1 VALUES(REPEAT(1, 8459264));
|
||||
SET @@GLOBAL.debug_dbug=@save_dbug;
|
||||
SELECT length(f1) FROM t1;
|
||||
length(f1)
|
||||
8459264
|
||||
DROP TABLE t1;
|
||||
# End of 10.6 tests
|
||||
|
@@ -163,9 +163,9 @@ DROP TABLE t1,t2;
|
||||
|
||||
CREATE TABLE t1 (a INT KEY) ENGINE=InnoDB;
|
||||
|
||||
--error 0,1193
|
||||
--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
|
||||
SET @save_limit = @@GLOBAL.innodb_limit_optimistic_insert_debug;
|
||||
--error 0,1193
|
||||
--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
|
||||
SET GLOBAL innodb_limit_optimistic_insert_debug = 2;
|
||||
|
||||
BEGIN;
|
||||
@@ -289,4 +289,30 @@ CREATE TABLE t1(a INT KEY)ENGINE=InnoDB
|
||||
INSERT INTO t1 VALUES(1);
|
||||
UPDATE t1 SET a = 2 WHERE a = 1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-34265 Possible hang during IO burst with innodb_flush_sync enabled
|
||||
--echo #
|
||||
CREATE TABLE t1(f1 MEDIUMTEXT)ENGINE=InnoDB;
|
||||
|
||||
--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
|
||||
SET @save_dbug=@@GLOBAL.debug_dbug;
|
||||
--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
|
||||
SET @@GLOBAL.debug_dbug='+d,ib_page_cleaner_sleep';
|
||||
|
||||
if ($have_debug) {
|
||||
SET STATEMENT debug_dbug='+d,ib_free_page_sleep' FOR
|
||||
INSERT INTO t1 VALUES(REPEAT(1, 8459264));
|
||||
}
|
||||
if (!$have_debug) {
|
||||
--echo SET STATEMENT debug_dbug='+d,ib_free_page_sleep' FOR
|
||||
INSERT INTO t1 VALUES(REPEAT(1, 8459264));
|
||||
}
|
||||
|
||||
--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
|
||||
SET @@GLOBAL.debug_dbug=@save_dbug;
|
||||
|
||||
SELECT length(f1) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo # End of 10.6 tests
|
||||
|
@@ -2025,13 +2025,43 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious)
|
||||
/** Conduct checkpoint-related flushing for innodb_flush_sync=ON,
|
||||
and try to initiate checkpoints until the target is met.
|
||||
@param lsn minimum value of buf_pool.get_oldest_modification(LSN_MAX) */
|
||||
ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
|
||||
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
|
||||
static void buf_flush_sync_for_checkpoint(lsn_t lsn)
|
||||
{
|
||||
ut_ad(!srv_read_only_mode);
|
||||
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
|
||||
|
||||
for (;;)
|
||||
/* During furious flush, we need to keep generating free pages. Otherwise
|
||||
concurrent mtrs could be blocked holding latches for the pages to be flushed
|
||||
causing deadlock in rare occasion.
|
||||
|
||||
Ideally we should be acquiring buffer pool mutex for the check but it is more
|
||||
expensive and we are not using the mutex while calling need_LRU_eviction() as
|
||||
of today. It is a quick and dirty read of the LRU and free list length.
|
||||
Atomic read of try_LRU_scan should eventually let us do the eviction.
|
||||
Correcting the inaccuracy would need more consideration to avoid any possible
|
||||
performance regression. */
|
||||
if (buf_pool.need_LRU_eviction())
|
||||
{
|
||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||
buf_pool.page_cleaner_set_idle(false);
|
||||
buf_pool.n_flush_inc();
|
||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||
|
||||
mysql_mutex_lock(&buf_pool.mutex);
|
||||
/* Confirm that eviction is needed after acquiring buffer pool mutex. */
|
||||
if (buf_pool.need_LRU_eviction())
|
||||
/* We intend to only evict pages keeping maximum flush bandwidth for
|
||||
flush list pages advancing checkpoint. However, if the LRU tail is full
|
||||
of dirty pages, we might need some flushing. */
|
||||
std::ignore= buf_flush_LRU(srv_io_capacity);
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
|
||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||
buf_pool.n_flush_dec();
|
||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||
}
|
||||
|
||||
if (ulint n_flushed= buf_flush_list(srv_max_io_capacity, lsn))
|
||||
{
|
||||
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
|
||||
@@ -2083,12 +2113,6 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
|
||||
/* wake up buf_flush_wait() */
|
||||
pthread_cond_broadcast(&buf_pool.done_flush_list);
|
||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||
|
||||
lsn= std::max(lsn, target);
|
||||
|
||||
if (measure >= lsn)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/** Check if the adpative flushing threshold is recommended based on
|
||||
@@ -2298,6 +2322,10 @@ static void buf_flush_page_cleaner()
|
||||
|
||||
for (;;)
|
||||
{
|
||||
DBUG_EXECUTE_IF("ib_page_cleaner_sleep",
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
});
|
||||
lsn_limit= buf_flush_sync_lsn;
|
||||
|
||||
if (UNIV_UNLIKELY(lsn_limit != 0) && UNIV_LIKELY(srv_flush_sync))
|
||||
|
@@ -406,17 +406,32 @@ got_block:
|
||||
ut_ad(LRU_size <= BUF_LRU_MIN_LEN ||
|
||||
available >= scan_depth || buf_pool.need_LRU_eviction());
|
||||
|
||||
ut_d(bool signalled = false);
|
||||
|
||||
if (UNIV_UNLIKELY(available < scan_depth) && LRU_size > BUF_LRU_MIN_LEN)
|
||||
{
|
||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||
if (!buf_pool.page_cleaner_active())
|
||||
{
|
||||
buf_pool.page_cleaner_wakeup(true);
|
||||
ut_d(signalled = true);
|
||||
}
|
||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||
}
|
||||
|
||||
if (!have_mutex)
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
|
||||
DBUG_EXECUTE_IF("ib_free_page_sleep",
|
||||
{
|
||||
static bool do_sleep = true;
|
||||
if (do_sleep && signalled)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
do_sleep = false;
|
||||
}
|
||||
});
|
||||
|
||||
block->page.zip.clear();
|
||||
return block;
|
||||
}
|
||||
|
Reference in New Issue
Block a user