mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
MDEV-24109 InnoDB hangs with innodb_flush_sync=OFF
MDEV-23855 broke the handling of innodb_flush_sync=OFF. That parameter is supposed to limit the page write rate in case the log capacity is being exceeded and log checkpoints are needed. With this fix, the following should pass: ./mtr --mysqld=--loose-innodb-flush-sync=0 One of our best regression tests for page flushing is encryption.innochecksum. With innodb_page_size=16k and innodb_flush_sync=OFF it would likely hang without this fix. log_sys.last_checkpoint_lsn: Declare as Atomic_relaxed<lsn_t> so that we are allowed to read the value while not holding log_sys.mutex. buf_flush_wait_flushed(): Let the page cleaner perform the flushing also if innodb_flush_sync=OFF. After the page cleaner has completed, perform a checkpoint if it is needed, because buf_flush_sync_for_checkpoint() will not be run if innodb_flush_sync=OFF. buf_flush_ahead(): Simplify the condition. We do not really care whether buf_flush_page_cleaner() is running. buf_flush_page_cleaner(): Evaluate innodb_flush_sync at the low level. If innodb_flush_sync=OFF, rate-limit the batches to innodb_io_capacity_max pages per second. Reviewed by: Vladislav Vaintroub
This commit is contained in:
@ -13,6 +13,7 @@
|
|||||||
along with this program; if not, write to the Free Software
|
along with this program; if not, write to the Free Software
|
||||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
/**
|
/**
|
||||||
|
@ -3,9 +3,11 @@
|
|||||||
|
|
||||||
[strict_crc32]
|
[strict_crc32]
|
||||||
--innodb-checksum-algorithm=strict_crc32
|
--innodb-checksum-algorithm=strict_crc32
|
||||||
|
--innodb-flush-sync=OFF
|
||||||
|
|
||||||
[full_crc32]
|
[full_crc32]
|
||||||
--innodb-checksum-algorithm=full_crc32
|
--innodb-checksum-algorithm=full_crc32
|
||||||
|
|
||||||
[strict_full_crc32]
|
[strict_full_crc32]
|
||||||
--innodb-checksum-algorithm=strict_full_crc32
|
--innodb-checksum-algorithm=strict_full_crc32
|
||||||
|
--innodb-flush-sync=OFF
|
||||||
|
@ -1,16 +1,11 @@
|
|||||||
SET @start_global_value = @@global.innodb_flush_sync;
|
SET @start_global_value = @@global.innodb_flush_sync;
|
||||||
SELECT @start_global_value;
|
|
||||||
@start_global_value
|
|
||||||
1
|
|
||||||
Valid values are 'ON' and 'OFF'
|
Valid values are 'ON' and 'OFF'
|
||||||
select @@global.innodb_flush_sync in (0, 1);
|
select @@global.innodb_flush_sync in (0, 1);
|
||||||
@@global.innodb_flush_sync in (0, 1)
|
@@global.innodb_flush_sync in (0, 1)
|
||||||
1
|
1
|
||||||
select @@global.innodb_flush_sync;
|
|
||||||
@@global.innodb_flush_sync
|
|
||||||
1
|
|
||||||
select @@session.innodb_flush_sync;
|
select @@session.innodb_flush_sync;
|
||||||
ERROR HY000: Variable 'innodb_flush_sync' is a GLOBAL variable
|
ERROR HY000: Variable 'innodb_flush_sync' is a GLOBAL variable
|
||||||
|
SET GLOBAL innodb_flush_sync = ON;
|
||||||
show global variables like 'innodb_flush_sync';
|
show global variables like 'innodb_flush_sync';
|
||||||
Variable_name Value
|
Variable_name Value
|
||||||
innodb_flush_sync ON
|
innodb_flush_sync ON
|
||||||
@ -87,6 +82,3 @@ INNODB_FLUSH_SYNC ON
|
|||||||
set global innodb_flush_sync='AUTO';
|
set global innodb_flush_sync='AUTO';
|
||||||
ERROR 42000: Variable 'innodb_flush_sync' can't be set to the value of 'AUTO'
|
ERROR 42000: Variable 'innodb_flush_sync' can't be set to the value of 'AUTO'
|
||||||
SET @@global.innodb_flush_sync = @start_global_value;
|
SET @@global.innodb_flush_sync = @start_global_value;
|
||||||
SELECT @@global.innodb_flush_sync;
|
|
||||||
@@global.innodb_flush_sync
|
|
||||||
1
|
|
||||||
|
@ -1,16 +1,15 @@
|
|||||||
--source include/have_innodb.inc
|
--source include/have_innodb.inc
|
||||||
|
|
||||||
SET @start_global_value = @@global.innodb_flush_sync;
|
SET @start_global_value = @@global.innodb_flush_sync;
|
||||||
SELECT @start_global_value;
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# exists as global only
|
# exists as global only
|
||||||
#
|
#
|
||||||
--echo Valid values are 'ON' and 'OFF'
|
--echo Valid values are 'ON' and 'OFF'
|
||||||
select @@global.innodb_flush_sync in (0, 1);
|
select @@global.innodb_flush_sync in (0, 1);
|
||||||
select @@global.innodb_flush_sync;
|
|
||||||
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
|
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
|
||||||
select @@session.innodb_flush_sync;
|
select @@session.innodb_flush_sync;
|
||||||
|
SET GLOBAL innodb_flush_sync = ON;
|
||||||
show global variables like 'innodb_flush_sync';
|
show global variables like 'innodb_flush_sync';
|
||||||
show session variables like 'innodb_flush_sync';
|
show session variables like 'innodb_flush_sync';
|
||||||
--disable_warnings
|
--disable_warnings
|
||||||
@ -18,9 +17,6 @@ select * from information_schema.global_variables where variable_name='innodb_fl
|
|||||||
select * from information_schema.session_variables where variable_name='innodb_flush_sync';
|
select * from information_schema.session_variables where variable_name='innodb_flush_sync';
|
||||||
--enable_warnings
|
--enable_warnings
|
||||||
|
|
||||||
#
|
|
||||||
# show that it's writable
|
|
||||||
#
|
|
||||||
set global innodb_flush_sync='OFF';
|
set global innodb_flush_sync='OFF';
|
||||||
select @@global.innodb_flush_sync;
|
select @@global.innodb_flush_sync;
|
||||||
--disable_warnings
|
--disable_warnings
|
||||||
@ -74,4 +70,3 @@ set global innodb_flush_sync='AUTO';
|
|||||||
#
|
#
|
||||||
|
|
||||||
SET @@global.innodb_flush_sync = @start_global_value;
|
SET @@global.innodb_flush_sync = @start_global_value;
|
||||||
SELECT @@global.innodb_flush_sync;
|
|
||||||
|
@ -1681,52 +1681,55 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
|
|||||||
|
|
||||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||||
|
|
||||||
|
if (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn)
|
||||||
|
{
|
||||||
#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */
|
#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */
|
||||||
if (UNIV_UNLIKELY(!buf_page_cleaner_is_active)
|
if (UNIV_UNLIKELY(!buf_page_cleaner_is_active)
|
||||||
ut_d(|| innodb_page_cleaner_disabled_debug))
|
ut_d(|| innodb_page_cleaner_disabled_debug))
|
||||||
{
|
|
||||||
for (;;)
|
|
||||||
{
|
{
|
||||||
const lsn_t lsn= buf_pool.get_oldest_modification(sync_lsn);
|
do
|
||||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
|
||||||
if (lsn >= sync_lsn)
|
|
||||||
return;
|
|
||||||
ulint n_pages= buf_flush_lists(srv_max_io_capacity, sync_lsn);
|
|
||||||
buf_flush_wait_batch_end_acquiring_mutex(false);
|
|
||||||
if (n_pages)
|
|
||||||
{
|
{
|
||||||
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
|
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||||
MONITOR_FLUSH_SYNC_COUNT,
|
ulint n_pages= buf_flush_lists(srv_max_io_capacity, sync_lsn);
|
||||||
MONITOR_FLUSH_SYNC_PAGES, n_pages);
|
buf_flush_wait_batch_end_acquiring_mutex(false);
|
||||||
log_checkpoint();
|
if (n_pages)
|
||||||
|
{
|
||||||
|
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
|
||||||
|
MONITOR_FLUSH_SYNC_COUNT,
|
||||||
|
MONITOR_FLUSH_SYNC_PAGES, n_pages);
|
||||||
|
}
|
||||||
|
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
|
||||||
|
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||||
}
|
}
|
||||||
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
|
while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn);
|
||||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
|
||||||
|
goto try_checkpoint;
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
}
|
|
||||||
else if (UNIV_LIKELY(srv_flush_sync))
|
|
||||||
#endif
|
#endif
|
||||||
{
|
|
||||||
if (buf_flush_sync_lsn < sync_lsn)
|
if (buf_flush_sync_lsn < sync_lsn)
|
||||||
{
|
{
|
||||||
buf_flush_sync_lsn= sync_lsn;
|
buf_flush_sync_lsn= sync_lsn;
|
||||||
mysql_cond_signal(&buf_pool.do_flush_list);
|
mysql_cond_signal(&buf_pool.do_flush_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
tpool::tpool_wait_begin();
|
||||||
|
thd_wait_begin(nullptr, THD_WAIT_DISKIO);
|
||||||
|
mysql_cond_wait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex);
|
||||||
|
thd_wait_end(nullptr);
|
||||||
|
tpool::tpool_wait_end();
|
||||||
|
|
||||||
|
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
|
||||||
|
}
|
||||||
|
while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn)
|
try_checkpoint:
|
||||||
{
|
|
||||||
tpool::tpool_wait_begin();
|
|
||||||
thd_wait_begin(nullptr, THD_WAIT_DISKIO);
|
|
||||||
mysql_cond_wait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex);
|
|
||||||
thd_wait_end(nullptr);
|
|
||||||
tpool::tpool_wait_end();
|
|
||||||
|
|
||||||
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
|
|
||||||
}
|
|
||||||
|
|
||||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||||
|
|
||||||
|
if (UNIV_UNLIKELY(log_sys.last_checkpoint_lsn < sync_lsn))
|
||||||
|
log_checkpoint();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** If innodb_flush_sync=ON, initiate a furious flush.
|
/** If innodb_flush_sync=ON, initiate a furious flush.
|
||||||
@ -1739,8 +1742,7 @@ void buf_flush_ahead(lsn_t lsn)
|
|||||||
if (recv_recovery_is_on())
|
if (recv_recovery_is_on())
|
||||||
recv_sys.apply(true);
|
recv_sys.apply(true);
|
||||||
|
|
||||||
if (buf_flush_sync_lsn < lsn &&
|
if (buf_flush_sync_lsn < lsn)
|
||||||
UNIV_LIKELY(srv_flush_sync) && UNIV_LIKELY(buf_page_cleaner_is_active))
|
|
||||||
{
|
{
|
||||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||||
if (buf_flush_sync_lsn < lsn)
|
if (buf_flush_sync_lsn < lsn)
|
||||||
@ -2054,13 +2056,15 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
|
|||||||
if (UNIV_UNLIKELY(lsn_limit != 0))
|
if (UNIV_UNLIKELY(lsn_limit != 0))
|
||||||
{
|
{
|
||||||
furious_flush:
|
furious_flush:
|
||||||
buf_flush_sync_for_checkpoint(lsn_limit);
|
if (UNIV_LIKELY(srv_flush_sync))
|
||||||
last_pages= 0;
|
{
|
||||||
set_timespec(abstime, 1);
|
buf_flush_sync_for_checkpoint(lsn_limit);
|
||||||
continue;
|
last_pages= 0;
|
||||||
|
set_timespec(abstime, 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
|
||||||
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
mysql_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex,
|
mysql_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex,
|
||||||
@ -2070,15 +2074,25 @@ furious_flush:
|
|||||||
lsn_limit= buf_flush_sync_lsn;
|
lsn_limit= buf_flush_sync_lsn;
|
||||||
|
|
||||||
if (UNIV_UNLIKELY(lsn_limit != 0))
|
if (UNIV_UNLIKELY(lsn_limit != 0))
|
||||||
goto furious_flush;
|
{
|
||||||
|
if (UNIV_LIKELY(srv_flush_sync))
|
||||||
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
|
goto furious_flush;
|
||||||
|
}
|
||||||
|
else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
const ulint dirty_blocks= UT_LIST_GET_LEN(buf_pool.flush_list);
|
const ulint dirty_blocks= UT_LIST_GET_LEN(buf_pool.flush_list);
|
||||||
|
|
||||||
if (!dirty_blocks)
|
if (!dirty_blocks)
|
||||||
|
{
|
||||||
|
if (UNIV_UNLIKELY(lsn_limit != 0))
|
||||||
|
{
|
||||||
|
buf_flush_sync_lsn= 0;
|
||||||
|
/* wake up buf_flush_wait_flushed() */
|
||||||
|
mysql_cond_broadcast(&buf_pool.done_flush_list);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* We perform dirty reads of the LRU+free list lengths here.
|
/* We perform dirty reads of the LRU+free list lengths here.
|
||||||
Division by zero is not possible, because buf_pool.flush_list is
|
Division by zero is not possible, because buf_pool.flush_list is
|
||||||
@ -2086,19 +2100,29 @@ furious_flush:
|
|||||||
const double dirty_pct= double(dirty_blocks) * 100.0 /
|
const double dirty_pct= double(dirty_blocks) * 100.0 /
|
||||||
double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
|
double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
|
||||||
|
|
||||||
if (dirty_pct < srv_max_dirty_pages_pct_lwm)
|
if (dirty_pct < srv_max_dirty_pages_pct_lwm && !lsn_limit)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
|
const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
|
||||||
|
|
||||||
|
if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
|
||||||
|
buf_flush_sync_lsn= 0;
|
||||||
|
|
||||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||||
|
|
||||||
ulint n_flushed;
|
ulint n_flushed;
|
||||||
|
|
||||||
if (!srv_adaptive_flushing)
|
if (UNIV_UNLIKELY(lsn_limit != 0))
|
||||||
|
{
|
||||||
|
n_flushed= buf_flush_lists(srv_max_io_capacity, lsn_limit);
|
||||||
|
/* wake up buf_flush_wait_flushed() */
|
||||||
|
mysql_cond_broadcast(&buf_pool.done_flush_list);
|
||||||
|
goto try_checkpoint;
|
||||||
|
}
|
||||||
|
else if (!srv_adaptive_flushing)
|
||||||
{
|
{
|
||||||
n_flushed= buf_flush_lists(srv_io_capacity, LSN_MAX);
|
n_flushed= buf_flush_lists(srv_io_capacity, LSN_MAX);
|
||||||
|
try_checkpoint:
|
||||||
if (n_flushed)
|
if (n_flushed)
|
||||||
{
|
{
|
||||||
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
|
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
|
||||||
|
@ -37,7 +37,7 @@ Created 12/9/1995 Heikki Tuuri
|
|||||||
#include "log0types.h"
|
#include "log0types.h"
|
||||||
#include "os0file.h"
|
#include "os0file.h"
|
||||||
#include "span.h"
|
#include "span.h"
|
||||||
#include <atomic>
|
#include "my_atomic_wrapper.h"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@ -615,8 +615,8 @@ public:
|
|||||||
new query step is started */
|
new query step is started */
|
||||||
ib_uint64_t next_checkpoint_no;
|
ib_uint64_t next_checkpoint_no;
|
||||||
/*!< next checkpoint number */
|
/*!< next checkpoint number */
|
||||||
lsn_t last_checkpoint_lsn;
|
/** latest completed checkpoint (protected by log_sys.mutex) */
|
||||||
/*!< latest checkpoint lsn */
|
Atomic_relaxed<lsn_t> last_checkpoint_lsn;
|
||||||
lsn_t next_checkpoint_lsn;
|
lsn_t next_checkpoint_lsn;
|
||||||
/*!< next checkpoint lsn */
|
/*!< next checkpoint lsn */
|
||||||
ulint n_pending_checkpoint_writes;
|
ulint n_pending_checkpoint_writes;
|
||||||
|
@ -920,7 +920,7 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn)
|
|||||||
|
|
||||||
DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
|
DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
|
||||||
", flushed to " LSN_PF,
|
", flushed to " LSN_PF,
|
||||||
log_sys.last_checkpoint_lsn,
|
lsn_t{log_sys.last_checkpoint_lsn},
|
||||||
log_sys.get_flushed_lsn()));
|
log_sys.get_flushed_lsn()));
|
||||||
|
|
||||||
MONITOR_INC(MONITOR_NUM_CHECKPOINT);
|
MONITOR_INC(MONITOR_NUM_CHECKPOINT);
|
||||||
@ -1235,7 +1235,7 @@ log_print(
|
|||||||
lsn,
|
lsn,
|
||||||
log_sys.get_flushed_lsn(),
|
log_sys.get_flushed_lsn(),
|
||||||
pages_flushed,
|
pages_flushed,
|
||||||
log_sys.last_checkpoint_lsn);
|
lsn_t{log_sys.last_checkpoint_lsn});
|
||||||
|
|
||||||
current_time = time(NULL);
|
current_time = time(NULL);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user