mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-12227 Defer writes to the InnoDB temporary tablespace
The flushing of the InnoDB temporary tablespace is unnecessarily tied to the write-ahead redo logging and redo log checkpoints, which must be tied to the page writes of persistent tablespaces. Let us simply omit any pages of temporary tables from buf_pool.flush_list. In this way, log checkpoints will never incur any 'collateral damage' of writing out unmodified changes for temporary tables. After this change, pages of the temporary tablespace can only be written out by buf_flush_lists(n_pages,0) as part of LRU eviction. Hopefully, most of the time, that code will never be executed, and instead, the temporary pages will be evicted by buf_release_freed_page() without ever being written back to the temporary tablespace file. This should improve the efficiency of the checkpoint flushing and the buf_flush_page_cleaner thread. Reviewed by: Vladislav Vaintroub
This commit is contained in:
@ -4,7 +4,7 @@ SET AUTO_INCREMENT_INCREMENT = 1;
|
|||||||
# MDEV-24348 InnoDB shutdown hang with innodb_flush_sync=0
|
# MDEV-24348 InnoDB shutdown hang with innodb_flush_sync=0
|
||||||
SET GLOBAL innodb_flush_sync=OFF;
|
SET GLOBAL innodb_flush_sync=OFF;
|
||||||
# For the server to hang, we must have pages for temporary tables
|
# For the server to hang, we must have pages for temporary tables
|
||||||
# (and this is only effective as long as MDEV-12227 is not fixed).
|
# (and the bug depended on MDEV-12227 not being fixed).
|
||||||
CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
|
CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
|
||||||
SET debug_dbug= '+d,ib_log_flush_ahead';
|
SET debug_dbug= '+d,ib_log_flush_ahead';
|
||||||
INSERT INTO t1 VALUES(NULL);
|
INSERT INTO t1 VALUES(NULL);
|
||||||
|
@ -12,7 +12,7 @@ SET AUTO_INCREMENT_INCREMENT = 1;
|
|||||||
--echo # MDEV-24348 InnoDB shutdown hang with innodb_flush_sync=0
|
--echo # MDEV-24348 InnoDB shutdown hang with innodb_flush_sync=0
|
||||||
SET GLOBAL innodb_flush_sync=OFF;
|
SET GLOBAL innodb_flush_sync=OFF;
|
||||||
--echo # For the server to hang, we must have pages for temporary tables
|
--echo # For the server to hang, we must have pages for temporary tables
|
||||||
--echo # (and this is only effective as long as MDEV-12227 is not fixed).
|
--echo # (and the bug depended on MDEV-12227 not being fixed).
|
||||||
CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
|
CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
|
||||||
SET debug_dbug= '+d,ib_log_flush_ahead';
|
SET debug_dbug= '+d,ib_log_flush_ahead';
|
||||||
|
|
||||||
|
@ -207,7 +207,7 @@ the common LRU list. That is, each manipulation of the common LRU
|
|||||||
list will result in the same manipulation of the unzip_LRU list.
|
list will result in the same manipulation of the unzip_LRU list.
|
||||||
|
|
||||||
The chain of modified blocks (buf_pool.flush_list) contains the blocks
|
The chain of modified blocks (buf_pool.flush_list) contains the blocks
|
||||||
holding file pages that have been modified in the memory
|
holding persistent file pages that have been modified in the memory
|
||||||
but not written to disk yet. The block with the oldest modification
|
but not written to disk yet. The block with the oldest modification
|
||||||
which has not yet been written to disk is at the end of the chain.
|
which has not yet been written to disk is at the end of the chain.
|
||||||
The access to this list is protected by buf_pool.flush_list_mutex.
|
The access to this list is protected by buf_pool.flush_list_mutex.
|
||||||
@ -1346,6 +1346,12 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (fsp_is_system_temporary(block->page.id().space()))
|
||||||
|
{
|
||||||
|
ut_ad(block->page.oldest_modification() <= 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (!block->page.ready_for_replace())
|
if (!block->page.ready_for_replace())
|
||||||
return block;
|
return block;
|
||||||
|
|
||||||
@ -1500,8 +1506,10 @@ void buf_pool_t::close()
|
|||||||
/* The buffer pool must be clean during normal shutdown.
|
/* The buffer pool must be clean during normal shutdown.
|
||||||
Only on aborted startup (with recovery) or with innodb_fast_shutdown=2
|
Only on aborted startup (with recovery) or with innodb_fast_shutdown=2
|
||||||
we may discard changes. */
|
we may discard changes. */
|
||||||
ut_ad(!bpage->oldest_modification() || srv_is_being_started ||
|
ut_d(const lsn_t oldest= bpage->oldest_modification();)
|
||||||
srv_fast_shutdown == 2);
|
ut_ad(!oldest || srv_is_being_started ||
|
||||||
|
srv_fast_shutdown == 2 ||
|
||||||
|
(oldest == 1 && fsp_is_system_temporary(bpage->id().space())));
|
||||||
|
|
||||||
if (bpage->state() != BUF_BLOCK_FILE_PAGE)
|
if (bpage->state() != BUF_BLOCK_FILE_PAGE)
|
||||||
buf_page_free_descriptor(bpage);
|
buf_page_free_descriptor(bpage);
|
||||||
@ -4349,6 +4357,7 @@ void buf_pool_t::validate()
|
|||||||
for (buf_page_t* b = UT_LIST_GET_FIRST(flush_list); b;
|
for (buf_page_t* b = UT_LIST_GET_FIRST(flush_list); b;
|
||||||
b = UT_LIST_GET_NEXT(list, b)) {
|
b = UT_LIST_GET_NEXT(list, b)) {
|
||||||
ut_ad(b->oldest_modification());
|
ut_ad(b->oldest_modification());
|
||||||
|
ut_ad(!fsp_is_system_temporary(b->id().space()));
|
||||||
n_flushing++;
|
n_flushing++;
|
||||||
|
|
||||||
switch (b->state()) {
|
switch (b->state()) {
|
||||||
|
@ -148,6 +148,7 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
|
|||||||
mysql_mutex_assert_not_owner(&buf_pool.mutex);
|
mysql_mutex_assert_not_owner(&buf_pool.mutex);
|
||||||
mysql_mutex_assert_owner(&log_sys.flush_order_mutex);
|
mysql_mutex_assert_owner(&log_sys.flush_order_mutex);
|
||||||
ut_ad(lsn);
|
ut_ad(lsn);
|
||||||
|
ut_ad(!fsp_is_system_temporary(block->page.id().space()));
|
||||||
|
|
||||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||||
block->page.set_oldest_modification(lsn);
|
block->page.set_oldest_modification(lsn);
|
||||||
@ -163,24 +164,27 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
|
|||||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Remove a block from buf_pool.flush_list */
|
||||||
|
static void buf_flush_remove_low(buf_page_t *bpage)
|
||||||
|
{
|
||||||
|
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
|
||||||
|
mysql_mutex_assert_owner(&buf_pool.mutex);
|
||||||
|
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
|
||||||
|
ut_ad(!bpage->oldest_modification());
|
||||||
|
buf_pool.flush_hp.adjust(bpage);
|
||||||
|
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
|
||||||
|
buf_pool.stat.flush_list_bytes -= bpage->physical_size();
|
||||||
|
#ifdef UNIV_DEBUG
|
||||||
|
buf_flush_validate_skip();
|
||||||
|
#endif /* UNIV_DEBUG */
|
||||||
|
}
|
||||||
|
|
||||||
/** Remove a block from the flush list of modified blocks.
|
/** Remove a block from the flush list of modified blocks.
|
||||||
@param[in,out] bpage block to be removed from the flush list */
|
@param[in,out] bpage block to be removed from the flush list */
|
||||||
static void buf_flush_remove(buf_page_t *bpage)
|
static void buf_flush_remove(buf_page_t *bpage)
|
||||||
{
|
{
|
||||||
mysql_mutex_assert_owner(&buf_pool.mutex);
|
bpage->clear_oldest_modification();
|
||||||
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
|
buf_flush_remove_low(bpage);
|
||||||
|
|
||||||
/* Important that we adjust the hazard pointer before removing
|
|
||||||
the bpage from flush list. */
|
|
||||||
buf_pool.flush_hp.adjust(bpage);
|
|
||||||
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
|
|
||||||
bpage->clear_oldest_modification();
|
|
||||||
|
|
||||||
buf_pool.stat.flush_list_bytes -= bpage->physical_size();
|
|
||||||
|
|
||||||
#ifdef UNIV_DEBUG
|
|
||||||
buf_flush_validate_skip();
|
|
||||||
#endif /* UNIV_DEBUG */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Remove all dirty pages belonging to a given tablespace when we are
|
/** Remove all dirty pages belonging to a given tablespace when we are
|
||||||
@ -280,6 +284,7 @@ buf_flush_relocate_on_flush_list(
|
|||||||
buf_page_t* prev;
|
buf_page_t* prev;
|
||||||
|
|
||||||
mysql_mutex_assert_owner(&buf_pool.mutex);
|
mysql_mutex_assert_owner(&buf_pool.mutex);
|
||||||
|
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
|
||||||
|
|
||||||
if (!bpage->oldest_modification()) {
|
if (!bpage->oldest_modification()) {
|
||||||
return;
|
return;
|
||||||
@ -356,11 +361,19 @@ void buf_page_write_complete(const IORequest &request)
|
|||||||
DBUG_PRINT("ib_buf", ("write page %u:%u",
|
DBUG_PRINT("ib_buf", ("write page %u:%u",
|
||||||
bpage->id().space(), bpage->id().page_no()));
|
bpage->id().space(), bpage->id().page_no()));
|
||||||
ut_ad(request.is_LRU() ? buf_pool.n_flush_LRU : buf_pool.n_flush_list);
|
ut_ad(request.is_LRU() ? buf_pool.n_flush_LRU : buf_pool.n_flush_list);
|
||||||
|
const bool temp= fsp_is_system_temporary(bpage->id().space());
|
||||||
|
|
||||||
mysql_mutex_lock(&buf_pool.mutex);
|
mysql_mutex_lock(&buf_pool.mutex);
|
||||||
bpage->set_io_fix(BUF_IO_NONE);
|
bpage->set_io_fix(BUF_IO_NONE);
|
||||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||||
buf_flush_remove(bpage);
|
ut_ad(!temp || bpage->oldest_modification() == 1);
|
||||||
|
bpage->clear_oldest_modification();
|
||||||
|
|
||||||
|
if (!temp)
|
||||||
|
buf_flush_remove_low(bpage);
|
||||||
|
else
|
||||||
|
ut_ad(request.is_LRU());
|
||||||
|
|
||||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||||
|
|
||||||
if (dblwr)
|
if (dblwr)
|
||||||
@ -787,8 +800,13 @@ static void buf_release_freed_page(buf_page_t *bpage)
|
|||||||
mysql_mutex_lock(&buf_pool.mutex);
|
mysql_mutex_lock(&buf_pool.mutex);
|
||||||
bpage->set_io_fix(BUF_IO_NONE);
|
bpage->set_io_fix(BUF_IO_NONE);
|
||||||
bpage->status= buf_page_t::NORMAL;
|
bpage->status= buf_page_t::NORMAL;
|
||||||
|
const bool temp= fsp_is_system_temporary(bpage->id().space());
|
||||||
|
ut_ad(!temp || uncompressed);
|
||||||
|
ut_ad(!temp || bpage->oldest_modification() == 1);
|
||||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||||
buf_flush_remove(bpage);
|
bpage->clear_oldest_modification();
|
||||||
|
if (!temp)
|
||||||
|
buf_flush_remove_low(bpage);
|
||||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||||
|
|
||||||
if (uncompressed)
|
if (uncompressed)
|
||||||
@ -1552,7 +1570,7 @@ ulint buf_flush_lists(ulint max_n, lsn_t lsn)
|
|||||||
const bool running= n_flush != 0;
|
const bool running= n_flush != 0;
|
||||||
/* FIXME: we are performing a dirty read of buf_pool.flush_list.count
|
/* FIXME: we are performing a dirty read of buf_pool.flush_list.count
|
||||||
while not holding buf_pool.flush_list_mutex */
|
while not holding buf_pool.flush_list_mutex */
|
||||||
if (running || !UT_LIST_GET_LEN(buf_pool.flush_list))
|
if (running || (lsn && !UT_LIST_GET_LEN(buf_pool.flush_list)))
|
||||||
{
|
{
|
||||||
if (!running)
|
if (!running)
|
||||||
mysql_cond_broadcast(cond);
|
mysql_cond_broadcast(cond);
|
||||||
@ -2098,7 +2116,6 @@ furious_flush:
|
|||||||
|
|
||||||
if (!dirty_blocks)
|
if (!dirty_blocks)
|
||||||
{
|
{
|
||||||
unemployed2:
|
|
||||||
if (UNIV_UNLIKELY(lsn_limit != 0))
|
if (UNIV_UNLIKELY(lsn_limit != 0))
|
||||||
{
|
{
|
||||||
buf_flush_sync_lsn= 0;
|
buf_flush_sync_lsn= 0;
|
||||||
@ -2119,14 +2136,9 @@ unemployed:
|
|||||||
if (dirty_pct < srv_max_dirty_pages_pct_lwm && !lsn_limit)
|
if (dirty_pct < srv_max_dirty_pages_pct_lwm && !lsn_limit)
|
||||||
goto unemployed;
|
goto unemployed;
|
||||||
|
|
||||||
const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
|
const lsn_t oldest_lsn= buf_pool.get_oldest_modified()
|
||||||
|
->oldest_modification();
|
||||||
#if 0 /* MDEV-12227 FIXME: enable this */
|
ut_ad(oldest_lsn);
|
||||||
ut_ad(oldest_lsn); /* dirty_blocks implies this */
|
|
||||||
#else
|
|
||||||
if (!oldest_lsn)
|
|
||||||
goto unemployed2;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
|
if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
|
||||||
buf_flush_sync_lsn= 0;
|
buf_flush_sync_lsn= 0;
|
||||||
@ -2307,7 +2319,8 @@ void buf_flush_sync()
|
|||||||
struct Check {
|
struct Check {
|
||||||
void operator()(const buf_page_t* elem) const
|
void operator()(const buf_page_t* elem) const
|
||||||
{
|
{
|
||||||
ut_a(elem->oldest_modification());
|
ut_ad(elem->oldest_modification());
|
||||||
|
ut_ad(!fsp_is_system_temporary(elem->id().space()));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -940,6 +940,15 @@ public:
|
|||||||
/** Clear oldest_modification when removing from buf_pool.flush_list */
|
/** Clear oldest_modification when removing from buf_pool.flush_list */
|
||||||
inline void clear_oldest_modification();
|
inline void clear_oldest_modification();
|
||||||
|
|
||||||
|
/** Notify that a page in a temporary tablespace has been modified. */
|
||||||
|
void set_temp_modified()
|
||||||
|
{
|
||||||
|
ut_ad(fsp_is_system_temporary(id().space()));
|
||||||
|
ut_ad(state() == BUF_BLOCK_FILE_PAGE);
|
||||||
|
ut_ad(!oldest_modification());
|
||||||
|
oldest_modification_= 1;
|
||||||
|
}
|
||||||
|
|
||||||
/** Prepare to release a file page to buf_pool.free. */
|
/** Prepare to release a file page to buf_pool.free. */
|
||||||
void free_file_page()
|
void free_file_page()
|
||||||
{
|
{
|
||||||
@ -1552,18 +1561,22 @@ public:
|
|||||||
bool is_block_lock(const rw_lock_t *l) const
|
bool is_block_lock(const rw_lock_t *l) const
|
||||||
{ return is_block_field(static_cast<const void*>(l)); }
|
{ return is_block_field(static_cast<const void*>(l)); }
|
||||||
|
|
||||||
/**
|
/** @return the block that was made dirty the longest time ago */
|
||||||
@return the smallest oldest_modification lsn for any page
|
const buf_page_t *get_oldest_modified() const
|
||||||
@retval empty_lsn if all modified persistent pages have been flushed */
|
|
||||||
lsn_t get_oldest_modification(lsn_t empty_lsn)
|
|
||||||
{
|
{
|
||||||
mysql_mutex_assert_owner(&flush_list_mutex);
|
mysql_mutex_assert_owner(&flush_list_mutex);
|
||||||
const buf_page_t *bpage= UT_LIST_GET_LAST(flush_list);
|
const buf_page_t *bpage= UT_LIST_GET_LAST(flush_list);
|
||||||
#if 1 /* MDEV-12227 FIXME: remove this loop */
|
ut_ad(!bpage || !fsp_is_system_temporary(bpage->id().space()));
|
||||||
for (; bpage && fsp_is_system_temporary(bpage->id().space());
|
ut_ad(!bpage || bpage->oldest_modification());
|
||||||
bpage= UT_LIST_GET_PREV(list, bpage))
|
return bpage;
|
||||||
ut_ad(bpage->oldest_modification());
|
}
|
||||||
#endif
|
|
||||||
|
/**
|
||||||
|
@return the smallest oldest_modification lsn for any page
|
||||||
|
@retval empty_lsn if all modified persistent pages have been flushed */
|
||||||
|
lsn_t get_oldest_modification(lsn_t empty_lsn) const
|
||||||
|
{
|
||||||
|
const buf_page_t *bpage= get_oldest_modified();
|
||||||
return bpage ? bpage->oldest_modification() : empty_lsn;
|
return bpage ? bpage->oldest_modification() : empty_lsn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,10 +65,12 @@ buf_flush_note_modification(
|
|||||||
|
|
||||||
const lsn_t oldest_modification = block->page.oldest_modification();
|
const lsn_t oldest_modification = block->page.oldest_modification();
|
||||||
|
|
||||||
if (!oldest_modification) {
|
if (oldest_modification) {
|
||||||
|
ut_ad(oldest_modification <= start_lsn);
|
||||||
|
} else if (!fsp_is_system_temporary(block->page.id().space())) {
|
||||||
buf_flush_insert_into_flush_list(block, start_lsn);
|
buf_flush_insert_into_flush_list(block, start_lsn);
|
||||||
} else {
|
} else {
|
||||||
ut_ad(oldest_modification <= start_lsn);
|
block->page.set_temp_modified();
|
||||||
}
|
}
|
||||||
|
|
||||||
srv_stats.buf_pool_write_requests.inc();
|
srv_stats.buf_pool_write_requests.inc();
|
||||||
|
Reference in New Issue
Block a user