mirror of
https://github.com/MariaDB/server.git
synced 2025-08-07 00:04:31 +03:00
MDEV-23855: Remove fil_system.LRU and reduce fil_system.mutex contention
Also fixes MDEV-23929: innodb_flush_neighbors is not being ignored for system tablespace on SSD When the maximum configured number of file is exceeded, InnoDB will close data files. We used to maintain a fil_system.LRU list and a counter fil_node_t::n_pending to achieve this, at the huge cost of multiple fil_system.mutex operations per I/O operation. fil_node_open_file_low(): Implement a FIFO replacement policy: The last opened file will be moved to the end of fil_system.space_list, and files will be closed from the start of the list. However, we will not move tablespaces in fil_system.space_list while i_s_tablespaces_encryption_fill_table() is executing (producing output for INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION) because it may cause information of some tablespaces to go missing. We also avoid this in mariabackup --backup because datafiles_iter_next() assumes that the ordering is not changed. IORequest: Fold more parameters to IORequest::type. fil_space_t::io(): Replaces fil_io(). fil_space_t::flush(): Replaces fil_flush(). OS_AIO_IBUF: Remove. We will always issue synchronous reads of the change buffer pages in buf_read_page_low(). We will always ignore some errors for background reads. This should reduce fil_system.mutex contention a little. fil_node_t::complete_write(): Replaces fil_node_t::complete_io(). On both read and write completion, fil_space_t::release_for_io() will have to be called. fil_space_t::io(): Do not acquire fil_system.mutex in the normal code path. xb_delta_open_matching_space(): Do not try to open the system tablespace which was already opened. This fixes a file sharing violation in mariabackup --prepare --incremental. Reviewed by: Vladislav Vaintroub
This commit is contained in:
@@ -93,7 +93,6 @@ xb_fil_node_close_file(
|
||||
mutex_enter(&fil_system.mutex);
|
||||
|
||||
ut_ad(node);
|
||||
ut_a(node->n_pending == 0);
|
||||
ut_a(node->n_pending_flushes == 0);
|
||||
ut_a(!node->being_extended);
|
||||
|
||||
@@ -108,20 +107,10 @@ xb_fil_node_close_file(
|
||||
ut_a(ret);
|
||||
|
||||
node->handle = OS_FILE_CLOSED;
|
||||
mutex_exit(&fil_system.mutex);
|
||||
|
||||
ut_a(fil_system.n_open > 0);
|
||||
fil_system.n_open--;
|
||||
|
||||
if (node->space->purpose == FIL_TYPE_TABLESPACE &&
|
||||
fil_is_user_tablespace_id(node->space->id)) {
|
||||
|
||||
ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
|
||||
|
||||
/* The node is in the LRU list, remove it */
|
||||
UT_LIST_REMOVE(fil_system.LRU, node);
|
||||
}
|
||||
|
||||
mutex_exit(&fil_system.mutex);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
@@ -180,18 +169,8 @@ xb_fil_cur_open(
|
||||
|
||||
return(XB_FIL_CUR_SKIP);
|
||||
}
|
||||
mutex_enter(&fil_system.mutex);
|
||||
|
||||
fil_system.n_open++;
|
||||
|
||||
if (node->space->purpose == FIL_TYPE_TABLESPACE &&
|
||||
fil_is_user_tablespace_id(node->space->id)) {
|
||||
|
||||
/* Put the node to the LRU list */
|
||||
UT_LIST_ADD_FIRST(fil_system.LRU, node);
|
||||
}
|
||||
|
||||
mutex_exit(&fil_system.mutex);
|
||||
}
|
||||
|
||||
ut_ad(node->is_open());
|
||||
@@ -427,7 +406,7 @@ xb_fil_cur_read(
|
||||
retry_count = 10;
|
||||
ret = XB_FIL_CUR_SUCCESS;
|
||||
|
||||
fil_space_t *space = fil_space_acquire_for_io(cursor->space_id);
|
||||
fil_space_t *space = fil_space_t::get_for_io(cursor->space_id);
|
||||
|
||||
if (!space) {
|
||||
return XB_FIL_CUR_ERROR;
|
||||
|
@@ -3011,6 +3011,7 @@ void
|
||||
xb_fil_io_init()
|
||||
{
|
||||
fil_system.create(srv_file_per_table ? 50000 : 5000);
|
||||
fil_system.freeze_space_list = 1;
|
||||
fil_system.space_id_reuse_warned = true;
|
||||
}
|
||||
|
||||
@@ -3087,24 +3088,16 @@ xb_load_single_table_tablespace(
|
||||
bool is_empty_file = file->exists() && file->is_empty_file();
|
||||
|
||||
if (err == DB_SUCCESS && file->space_id() != SRV_TMP_SPACE_ID) {
|
||||
os_offset_t node_size = os_file_get_size(file->handle());
|
||||
os_offset_t n_pages;
|
||||
|
||||
ut_a(node_size != (os_offset_t) -1);
|
||||
|
||||
n_pages = node_size / fil_space_t::physical_size(file->flags());
|
||||
|
||||
space = fil_space_create(
|
||||
space = fil_space_t::create(
|
||||
name, file->space_id(), file->flags(),
|
||||
FIL_TYPE_TABLESPACE, NULL/* TODO: crypt_data */);
|
||||
|
||||
ut_a(space != NULL);
|
||||
|
||||
space->add(file->filepath(), OS_FILE_CLOSED, uint32_t(n_pages),
|
||||
false, false);
|
||||
space->add(file->filepath(), OS_FILE_CLOSED, 0, false, false);
|
||||
/* by opening the tablespace we forcing node and space objects
|
||||
in the cache to be populated with fields from space header */
|
||||
space->open();
|
||||
space->get_size();
|
||||
|
||||
if (srv_operation == SRV_OPERATION_RESTORE_DELTA
|
||||
|| xb_close_files) {
|
||||
@@ -3406,19 +3399,6 @@ xb_load_tablespaces()
|
||||
return(DB_SUCCESS);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Initialize the tablespace memory cache and populate it by scanning for and
|
||||
opening data files.
|
||||
@returns DB_SUCCESS or error code.*/
|
||||
static
|
||||
dberr_t
|
||||
xb_data_files_init()
|
||||
{
|
||||
xb_fil_io_init();
|
||||
|
||||
return(xb_load_tablespaces());
|
||||
}
|
||||
|
||||
/** Destroy the tablespace memory cache. */
|
||||
static void xb_data_files_close()
|
||||
{
|
||||
@@ -4607,6 +4587,22 @@ xb_delta_open_matching_space(
|
||||
return file;
|
||||
}
|
||||
|
||||
if (!info.space_id && fil_system.sys_space) {
|
||||
fil_node_t *node
|
||||
= UT_LIST_GET_FIRST(fil_system.sys_space->chain);
|
||||
for (; node; node = UT_LIST_GET_NEXT(chain, node)) {
|
||||
if (!strcmp(node->name, real_name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (node && node->handle != OS_FILE_CLOSED) {
|
||||
*success = true;
|
||||
return node->handle;
|
||||
}
|
||||
msg("mariabackup: Cannot find file %s\n", real_name);
|
||||
return OS_FILE_CLOSED;
|
||||
}
|
||||
|
||||
log_mutex_enter();
|
||||
if (!fil_is_user_tablespace_id(info.space_id)) {
|
||||
found:
|
||||
@@ -4704,7 +4700,7 @@ exit:
|
||||
ut_ad(fil_space_t::zip_size(flags) == info.zip_size);
|
||||
ut_ad(fil_space_t::physical_size(flags) == info.page_size);
|
||||
|
||||
if (fil_space_create(dest_space_name, info.space_id, flags,
|
||||
if (fil_space_t::create(dest_space_name, info.space_id, flags,
|
||||
FIL_TYPE_TABLESPACE, 0)) {
|
||||
*success = xb_space_create_file(real_name, info.space_id,
|
||||
flags, &file);
|
||||
@@ -4925,7 +4921,7 @@ xtrabackup_apply_delta(
|
||||
os_file_close(src_file);
|
||||
os_file_delete(0,src_path);
|
||||
}
|
||||
if (dst_file != OS_FILE_CLOSED)
|
||||
if (dst_file != OS_FILE_CLOSED && info.space_id)
|
||||
os_file_close(dst_file);
|
||||
return TRUE;
|
||||
|
||||
@@ -4933,7 +4929,7 @@ error:
|
||||
aligned_free(incremental_buffer);
|
||||
if (src_file != OS_FILE_CLOSED)
|
||||
os_file_close(src_file);
|
||||
if (dst_file != OS_FILE_CLOSED)
|
||||
if (dst_file != OS_FILE_CLOSED && info.space_id)
|
||||
os_file_close(dst_file);
|
||||
msg("Error: xtrabackup_apply_delta(): "
|
||||
"failed to apply %s to %s.\n", src_path, dst_path);
|
||||
@@ -5387,8 +5383,8 @@ static bool xtrabackup_prepare_func(char** argv)
|
||||
srv_allow_writes_event = os_event_create(0);
|
||||
os_event_set(srv_allow_writes_event);
|
||||
#endif
|
||||
dberr_t err = xb_data_files_init();
|
||||
if (err != DB_SUCCESS) {
|
||||
xb_fil_io_init();
|
||||
if (dberr_t err = xb_load_tablespaces()) {
|
||||
msg("mariabackup: error: xb_data_files_init() failed "
|
||||
"with error %s\n", ut_strerr(err));
|
||||
goto error_cleanup;
|
||||
@@ -5396,7 +5392,8 @@ static bool xtrabackup_prepare_func(char** argv)
|
||||
|
||||
inc_dir_tables_hash.create(1000);
|
||||
|
||||
ok = xtrabackup_apply_deltas();
|
||||
ok = fil_system.sys_space->open(false)
|
||||
&& xtrabackup_apply_deltas();
|
||||
|
||||
xb_data_files_close();
|
||||
|
||||
@@ -5426,6 +5423,8 @@ static bool xtrabackup_prepare_func(char** argv)
|
||||
goto error_cleanup;
|
||||
}
|
||||
|
||||
fil_system.freeze_space_list = 0;
|
||||
|
||||
/* increase IO threads */
|
||||
if (srv_n_file_io_threads < 10) {
|
||||
srv_n_read_io_threads = 4;
|
||||
@@ -5447,6 +5446,8 @@ static bool xtrabackup_prepare_func(char** argv)
|
||||
goto error_cleanup;
|
||||
}
|
||||
|
||||
ut_ad(!fil_system.freeze_space_list);
|
||||
|
||||
if (ok) {
|
||||
msg("Last binlog file %s, position %lld",
|
||||
trx_sys.recovered_binlog_filename,
|
||||
|
@@ -29,6 +29,7 @@ create table t1(a int not null primary key, b char(200)) engine=innodb;
|
||||
--source include/wait_condition.inc
|
||||
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
|
||||
--sorted_result
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
|
||||
|
||||
--echo # Success!
|
||||
@@ -41,6 +42,7 @@ SET GLOBAL innodb_encrypt_tables = off;
|
||||
--let $wait_condition=SELECT COUNT(*) = $tables_count FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0 AND ROTATING_OR_FLUSHING = 0;
|
||||
--source include/wait_condition.inc
|
||||
|
||||
--sorted_result
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
|
||||
|
||||
@@ -51,6 +53,7 @@ SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_
|
||||
--let $restart_parameters=--skip-file-key-management --innodb-encrypt-tables=OFF --innodb-encryption-threads=0 --innodb-tablespaces-encryption
|
||||
-- source include/restart_mysqld.inc
|
||||
|
||||
--sorted_result
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
|
||||
|
||||
|
@@ -26,6 +26,7 @@ let $restart_parameters= --innodb_encryption_threads=5 --innodb_encryption_rotat
|
||||
--source include/wait_condition.inc
|
||||
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
|
||||
--sorted_result
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
|
||||
|
||||
--echo # Restart the server with innodb_encryption_rotate_key_age= 0
|
||||
@@ -45,6 +46,7 @@ create table t4 (f1 int not null)engine=innodb encrypted=NO;
|
||||
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
|
||||
|
||||
--sorted_result
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
|
||||
|
||||
--echo # Disable encryption when innodb_encryption_rotate_key_age is 0
|
||||
@@ -57,6 +59,7 @@ set global innodb_encrypt_tables = OFF;
|
||||
--let $wait_condition=SELECT COUNT(*) >= $tables_count FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0 AND ROTATING_OR_FLUSHING = 0;
|
||||
--source include/wait_condition.inc
|
||||
|
||||
--sorted_result
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
|
||||
--echo # Display only encrypted create tables (t3)
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
|
||||
@@ -73,11 +76,13 @@ set global innodb_encrypt_tables = ON;
|
||||
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
|
||||
--echo # Display only unencrypted create tables (t4)
|
||||
--sorted_result
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
|
||||
|
||||
--let $restart_parameters=
|
||||
-- source include/restart_mysqld.inc
|
||||
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
|
||||
--sorted_result
|
||||
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
|
||||
DROP TABLE t4, t3, t2, t1;
|
||||
|
@@ -1,4 +1,4 @@
|
||||
call mtr.add_suppression("InnoDB: innodb_open_files=13 is exceeded");
|
||||
call mtr.add_suppression("InnoDB: innodb_open_files=.* is exceeded");
|
||||
SET @save_tdc= @@GLOBAL.table_definition_cache;
|
||||
SET @save_toc= @@GLOBAL.table_open_cache;
|
||||
SET GLOBAL table_definition_cache= 400;
|
||||
|
@@ -32,18 +32,6 @@ commit;
|
||||
set autocommit=1;
|
||||
|
||||
|
||||
let $success= `SELECT variable_value FROM information_schema.global_status WHERE variable_name = 'innodb_num_page_compressed_trim_op'`;
|
||||
|
||||
if (!$success) {
|
||||
--disable_query_log
|
||||
--disable_result_log
|
||||
DROP PROCEDURE innodb_insert_proc;
|
||||
DROP TABLE innodb_page_compressed;
|
||||
--enable_query_log
|
||||
--enable_result_log
|
||||
--skip "Test requires TRIM";
|
||||
}
|
||||
|
||||
DROP PROCEDURE innodb_insert_proc;
|
||||
DROP TABLE innodb_page_compressed;
|
||||
|
||||
|
@@ -4,7 +4,7 @@
|
||||
# This test is slow on buildbot.
|
||||
--source include/big_test.inc
|
||||
|
||||
call mtr.add_suppression("InnoDB: innodb_open_files=13 is exceeded");
|
||||
call mtr.add_suppression("InnoDB: innodb_open_files=.* is exceeded");
|
||||
|
||||
SET @save_tdc= @@GLOBAL.table_definition_cache;
|
||||
SET @save_toc= @@GLOBAL.table_open_cache;
|
||||
|
@@ -1,3 +1,4 @@
|
||||
|
||||
# Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2014, 2020, MariaDB Corporation.
|
||||
#
|
||||
@@ -186,7 +187,6 @@ SET(INNOBASE_SOURCES
|
||||
include/mtr0mtr.h
|
||||
include/mtr0mtr.ic
|
||||
include/mtr0types.h
|
||||
include/os0api.h
|
||||
include/os0event.h
|
||||
include/os0file.h
|
||||
include/os0file.ic
|
||||
|
@@ -3304,22 +3304,35 @@ upd_sys:
|
||||
|
||||
/**
|
||||
Prefetch siblings of the leaf for the pessimistic operation.
|
||||
@param block leaf page */
|
||||
static void btr_cur_prefetch_siblings(const buf_block_t* block)
|
||||
@param block leaf page
|
||||
@param index index of the page */
|
||||
static void btr_cur_prefetch_siblings(const buf_block_t *block,
|
||||
const dict_index_t *index)
|
||||
{
|
||||
const page_t *page= block->frame;
|
||||
ut_ad(page_is_leaf(page));
|
||||
ut_ad(page_is_leaf(block->frame));
|
||||
|
||||
if (index->is_ibuf())
|
||||
return;
|
||||
|
||||
const page_t *page= block->frame;
|
||||
uint32_t prev= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_PREV));
|
||||
uint32_t next= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_NEXT));
|
||||
|
||||
if (prev != FIL_NULL)
|
||||
buf_read_page_background(page_id_t(block->page.id().space(), prev),
|
||||
{
|
||||
ut_a(index->table->space->acquire_for_io());
|
||||
buf_read_page_background(index->table->space,
|
||||
page_id_t(block->page.id().space(), prev),
|
||||
block->zip_size(), false);
|
||||
}
|
||||
if (next != FIL_NULL)
|
||||
buf_read_page_background(page_id_t(block->page.id().space(), next),
|
||||
{
|
||||
ut_a(index->table->space->acquire_for_io());
|
||||
buf_read_page_background(index->table->space,
|
||||
page_id_t(block->page.id().space(), next),
|
||||
block->zip_size(), false);
|
||||
}
|
||||
}
|
||||
|
||||
/*************************************************************//**
|
||||
Tries to perform an insert to a page in an index tree, next to cursor.
|
||||
@@ -3436,8 +3449,8 @@ fail:
|
||||
|
||||
/* prefetch siblings of the leaf for the pessimistic
|
||||
operation, if the page is leaf. */
|
||||
if (page_is_leaf(page) && !index->is_ibuf()) {
|
||||
btr_cur_prefetch_siblings(block);
|
||||
if (page_is_leaf(page)) {
|
||||
btr_cur_prefetch_siblings(block, index);
|
||||
}
|
||||
fail_err:
|
||||
|
||||
@@ -4575,7 +4588,7 @@ any_extern:
|
||||
|
||||
/* prefetch siblings of the leaf for the pessimistic
|
||||
operation. */
|
||||
btr_cur_prefetch_siblings(block);
|
||||
btr_cur_prefetch_siblings(block, index);
|
||||
|
||||
return(DB_OVERFLOW);
|
||||
}
|
||||
@@ -4766,10 +4779,10 @@ func_exit:
|
||||
}
|
||||
}
|
||||
|
||||
if (err != DB_SUCCESS && !index->is_ibuf()) {
|
||||
if (err != DB_SUCCESS) {
|
||||
/* prefetch siblings of the leaf for the pessimistic
|
||||
operation. */
|
||||
btr_cur_prefetch_siblings(block);
|
||||
btr_cur_prefetch_siblings(block, index);
|
||||
}
|
||||
|
||||
return(err);
|
||||
@@ -5481,7 +5494,7 @@ btr_cur_optimistic_delete_func(
|
||||
if (!no_compress_needed) {
|
||||
/* prefetch siblings of the leaf for the pessimistic
|
||||
operation. */
|
||||
btr_cur_prefetch_siblings(block);
|
||||
btr_cur_prefetch_siblings(block, cursor->index);
|
||||
goto func_exit;
|
||||
}
|
||||
|
||||
|
@@ -2768,7 +2768,7 @@ buf_zip_decompress(
|
||||
ulint size = page_zip_get_size(&block->page.zip);
|
||||
/* The tablespace will not be found if this function is called
|
||||
during IMPORT. */
|
||||
fil_space_t* space= fil_space_acquire_for_io(block->page.id().space());
|
||||
fil_space_t* space= fil_space_t::get_for_io(block->page.id().space());
|
||||
const unsigned key_version = mach_read_from_4(
|
||||
frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
|
||||
fil_space_crypt_t* crypt_data = space ? space->crypt_data : NULL;
|
||||
@@ -3034,10 +3034,9 @@ buf_page_get_low(
|
||||
/* fall through */
|
||||
case BUF_GET:
|
||||
case BUF_GET_IF_IN_POOL_OR_WATCH:
|
||||
fil_space_t* s = fil_space_acquire_for_io(page_id.space());
|
||||
fil_space_t* s = fil_space_get(page_id.space());
|
||||
ut_ad(s);
|
||||
ut_ad(s->zip_size() == zip_size);
|
||||
s->release_for_io();
|
||||
}
|
||||
#endif /* UNIV_DEBUG */
|
||||
|
||||
@@ -3107,7 +3106,7 @@ lookup:
|
||||
}
|
||||
|
||||
/* The call path is buf_read_page() ->
|
||||
buf_read_page_low() (fil_io()) ->
|
||||
buf_read_page_low() (fil_space_t::io()) ->
|
||||
buf_page_read_complete() ->
|
||||
buf_decrypt_after_read(). Here fil_space_t* is used
|
||||
and we decrypt -> buf_page_check_corrupt() where page
|
||||
@@ -3161,8 +3160,7 @@ lookup:
|
||||
asserting. */
|
||||
if (page_id.space() == TRX_SYS_SPACE) {
|
||||
} else if (page_id.space() == SRV_TMP_SPACE_ID) {
|
||||
} else if (fil_space_t* space
|
||||
= fil_space_acquire_for_io(
|
||||
} else if (fil_space_t* space= fil_space_t::get_for_io(
|
||||
page_id.space())) {
|
||||
bool set = dict_set_corrupted_by_space(space);
|
||||
space->release_for_io();
|
||||
@@ -3376,8 +3374,8 @@ re_evict:
|
||||
if (mode != BUF_GET_IF_IN_POOL
|
||||
&& mode != BUF_GET_IF_IN_POOL_OR_WATCH) {
|
||||
} else if (!ibuf_debug) {
|
||||
} else if (fil_space_t* space =
|
||||
fil_space_acquire_for_io(page_id.space())) {
|
||||
} else if (fil_space_t* space
|
||||
= fil_space_t::get_for_io(page_id.space())) {
|
||||
/* Try to evict the block from the buffer pool, to use the
|
||||
insert buffer (change buffer) as much as possible. */
|
||||
|
||||
@@ -4869,17 +4867,4 @@ std::ostream& operator<<(std::ostream &out, const page_id_t page_id)
|
||||
<< ", page number=" << page_id.page_no() << "]";
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
Calculate the length of trim (punch_hole) operation.
|
||||
@param[in] bpage Page control block
|
||||
@param[in] write_length Write length
|
||||
@return length of the trim or zero. */
|
||||
ulint
|
||||
buf_page_get_trim_length(
|
||||
const buf_page_t* bpage,
|
||||
ulint write_length)
|
||||
{
|
||||
return bpage->physical_size() - write_length;
|
||||
}
|
||||
#endif /* !UNIV_INNOCHECKSUM */
|
||||
|
@@ -125,7 +125,8 @@ too_small:
|
||||
|
||||
byte *fseg_header= TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG +
|
||||
trx_sys_block->frame;
|
||||
for (uint32_t prev_page_no= 0, i= 0; i < 2 * size + FSP_EXTENT_SIZE / 2; i++)
|
||||
for (uint32_t prev_page_no= 0, i= 0, extent_size= FSP_EXTENT_SIZE;
|
||||
i < 2 * size + extent_size / 2; i++)
|
||||
{
|
||||
buf_block_t *new_block= fseg_alloc_free_page(fseg_header, prev_page_no + 1,
|
||||
FSP_UP, &mtr);
|
||||
@@ -362,15 +363,13 @@ void buf_dblwr_t::recover()
|
||||
continue;
|
||||
}
|
||||
|
||||
fil_space_t* space= fil_space_acquire_for_io(space_id);
|
||||
fil_space_t *space= fil_space_t::get_for_io(space_id);
|
||||
|
||||
if (!space)
|
||||
/* The tablespace that this page once belonged to does not exist */
|
||||
continue;
|
||||
|
||||
fil_space_open_if_needed(space);
|
||||
|
||||
if (UNIV_UNLIKELY(page_no >= space->size))
|
||||
if (UNIV_UNLIKELY(page_no >= space->get_size()))
|
||||
{
|
||||
/* Do not report the warning for undo tablespaces, because they
|
||||
can be truncated in place. */
|
||||
@@ -385,7 +384,6 @@ next_page:
|
||||
}
|
||||
|
||||
const ulint physical_size= space->physical_size();
|
||||
const ulint zip_size= space->zip_size();
|
||||
ut_ad(!buf_is_zeroes(span<const byte>(page, physical_size)));
|
||||
|
||||
/* We want to ensure that for partial reads the unread portion of
|
||||
@@ -393,18 +391,15 @@ next_page:
|
||||
memset(read_buf, 0x0, physical_size);
|
||||
|
||||
/* Read in the actual page from the file */
|
||||
fil_io_t fio= fil_io(IORequest(IORequest::READ | IORequest::DBLWR_RECOVER),
|
||||
true, page_id, zip_size,
|
||||
0, physical_size, read_buf, nullptr);
|
||||
fil_io_t fio= space->io(IORequest(IORequest::DBLWR_RECOVER),
|
||||
os_offset_t{page_no} * physical_size,
|
||||
physical_size, read_buf);
|
||||
|
||||
if (UNIV_UNLIKELY(fio.err != DB_SUCCESS))
|
||||
ib::warn() << "Double write buffer recovery: " << page_id
|
||||
<< " (tablespace '" << space->name
|
||||
<< "') read failed with error: " << fio.err;
|
||||
|
||||
if (fio.node)
|
||||
fio.node->space->release_for_io();
|
||||
|
||||
if (buf_is_zeroes(span<const byte>(read_buf, physical_size)))
|
||||
{
|
||||
/* We will check if the copy in the doublewrite buffer is
|
||||
@@ -425,18 +420,16 @@ next_page:
|
||||
|
||||
/* Write the good page from the doublewrite buffer to the intended
|
||||
position. */
|
||||
fio= fil_io(IORequestWrite, true, page_id, zip_size, 0, physical_size,
|
||||
page, nullptr);
|
||||
space->reacquire_for_io();
|
||||
fio= space->io(IORequestWrite,
|
||||
os_offset_t{page_id.page_no()} * physical_size,
|
||||
physical_size, page);
|
||||
|
||||
if (fio.node)
|
||||
{
|
||||
ut_ad(fio.err == DB_SUCCESS);
|
||||
if (fio.err == DB_SUCCESS)
|
||||
ib::info() << "Recovered page " << page_id << " to '" << fio.node->name
|
||||
<< "' from the doublewrite buffer.";
|
||||
fio.node->space->release_for_io();
|
||||
goto next_page;
|
||||
}
|
||||
}
|
||||
|
||||
recv_sys.dblwr.pages.clear();
|
||||
fil_flush_file_spaces();
|
||||
@@ -513,7 +506,7 @@ static void buf_dblwr_check_page_lsn(const page_t* page, const fil_space_t& s)
|
||||
|
||||
static void buf_dblwr_check_page_lsn(const buf_page_t &b, const byte *page)
|
||||
{
|
||||
if (fil_space_t *space= fil_space_acquire_for_io(b.id().space()))
|
||||
if (fil_space_t *space= fil_space_t::get_for_io(b.id().space()))
|
||||
{
|
||||
buf_dblwr_check_page_lsn(page, *space);
|
||||
space->release_for_io();
|
||||
@@ -577,7 +570,7 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
|
||||
#ifdef UNIV_DEBUG
|
||||
for (ulint len2= 0, i= 0; i < old_first_free; len2 += srv_page_size, i++)
|
||||
{
|
||||
buf_page_t *bpage= buf_block_arr[i].bpage;
|
||||
buf_page_t *bpage= buf_block_arr[i].request.bpage;
|
||||
|
||||
if (bpage->zip.data)
|
||||
/* No simple validate for ROW_FORMAT=COMPRESSED pages exists. */
|
||||
@@ -590,18 +583,22 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
|
||||
}
|
||||
#endif /* UNIV_DEBUG */
|
||||
/* Write out the first block of the doublewrite buffer */
|
||||
fil_io_t fio= fil_io(IORequestWrite, true, block1, 0, 0,
|
||||
std::min(size, old_first_free) << srv_page_size_shift,
|
||||
write_buf, nullptr);
|
||||
fio.node->space->release_for_io();
|
||||
ut_a(fil_system.sys_space->acquire_for_io());
|
||||
fil_system.sys_space->io(IORequestWrite,
|
||||
os_offset_t{block1.page_no()} <<
|
||||
srv_page_size_shift,
|
||||
std::min(size, old_first_free) <<
|
||||
srv_page_size_shift, write_buf);
|
||||
|
||||
if (old_first_free > size)
|
||||
{
|
||||
/* Write out the second block of the doublewrite buffer. */
|
||||
fio= fil_io(IORequestWrite, true, block2, 0, 0,
|
||||
ut_a(fil_system.sys_space->acquire_for_io());
|
||||
fil_system.sys_space->io(IORequestWrite,
|
||||
os_offset_t{block2.page_no()} <<
|
||||
srv_page_size_shift,
|
||||
(old_first_free - size) << srv_page_size_shift,
|
||||
write_buf + (size << srv_page_size_shift), nullptr);
|
||||
fio.node->space->release_for_io();
|
||||
write_buf + (size << srv_page_size_shift));
|
||||
}
|
||||
|
||||
/* increment the doublewrite flushed pages counter */
|
||||
@@ -609,7 +606,7 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
|
||||
srv_stats.dblwr_writes.inc();
|
||||
|
||||
/* Now flush the doublewrite buffer data to disk */
|
||||
fil_flush(TRX_SYS_SPACE);
|
||||
fil_system.sys_space->flush();
|
||||
|
||||
/* We know that the writes have been flushed to disk now
|
||||
and in recovery we will find them in the doublewrite buffer
|
||||
@@ -629,8 +626,8 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
|
||||
for (ulint i= 0; i < old_first_free; i++)
|
||||
{
|
||||
auto e= buf_block_arr[i];
|
||||
buf_page_t* bpage= e.bpage;
|
||||
ut_a(bpage->in_file());
|
||||
buf_page_t* bpage= e.request.bpage;
|
||||
ut_ad(bpage->in_file());
|
||||
|
||||
/* We request frame here to get correct buffer in case of
|
||||
encryption and/or page compression */
|
||||
@@ -650,8 +647,7 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
|
||||
ut_d(buf_dblwr_check_page_lsn(*bpage, static_cast<const byte*>(frame)));
|
||||
}
|
||||
|
||||
fil_io(IORequest(IORequest::WRITE, bpage, e.lru), false,
|
||||
bpage->id(), bpage->zip_size(), 0, e_size, frame, bpage);
|
||||
e.space->io(e.request, bpage->physical_offset(), e_size, frame, bpage);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -680,12 +676,20 @@ void buf_dblwr_t::flush_buffered_writes()
|
||||
|
||||
/** Schedule a page write. If the doublewrite memory buffer is full,
|
||||
flush_buffered_writes() will be invoked to make space.
|
||||
@param bpage buffer pool page to be written
|
||||
@param lru true=buf_pool.LRU; false=buf_pool.flush_list
|
||||
@param space tablespace
|
||||
@param request asynchronous write request
|
||||
@param size payload size in bytes */
|
||||
void buf_dblwr_t::add_to_batch(buf_page_t *bpage, bool lru, size_t size)
|
||||
void buf_dblwr_t::add_to_batch(fil_space_t *space, const IORequest &request,
|
||||
size_t size)
|
||||
{
|
||||
ut_ad(bpage->in_file());
|
||||
ut_ad(request.is_async());
|
||||
ut_ad(request.is_write());
|
||||
ut_ad(request.bpage);
|
||||
ut_ad(request.bpage->in_file());
|
||||
ut_ad(space->id == request.bpage->id().space());
|
||||
ut_ad(space->pending_io());
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
const ulint buf_size= 2 * block_size();
|
||||
|
||||
mysql_mutex_lock(&mutex);
|
||||
@@ -707,13 +711,13 @@ void buf_dblwr_t::add_to_batch(buf_page_t *bpage, bool lru, size_t size)
|
||||
|
||||
/* We request frame here to get correct buffer in case of
|
||||
encryption and/or page compression */
|
||||
void *frame= buf_page_get_frame(bpage);
|
||||
void *frame= buf_page_get_frame(request.bpage);
|
||||
|
||||
memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(p, frame, size);
|
||||
ut_ad(!bpage->zip_size() || bpage->zip_size() == size);
|
||||
ut_ad(!request.bpage->zip_size() || request.bpage->zip_size() == size);
|
||||
ut_ad(reserved == first_free);
|
||||
ut_ad(reserved < buf_size);
|
||||
buf_block_arr[first_free++]= { bpage, lru, size };
|
||||
new (buf_block_arr + first_free++) element{space, request, size};
|
||||
reserved= first_free;
|
||||
|
||||
if (first_free != buf_size || !flush_buffered_writes(buf_size / 2))
|
||||
|
@@ -626,6 +626,14 @@ buf_load()
|
||||
so all pages from a given tablespace are consecutive. */
|
||||
ulint cur_space_id = dump[0].space();
|
||||
fil_space_t* space = fil_space_acquire_silent(cur_space_id);
|
||||
if (space) {
|
||||
bool ok = space->acquire_for_io();
|
||||
space->release();
|
||||
if (!ok) {
|
||||
space = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
ulint zip_size = space ? space->zip_size() : 0;
|
||||
|
||||
PSI_stage_progress* pfs_stage_progress __attribute__((unused))
|
||||
@@ -644,22 +652,32 @@ buf_load()
|
||||
}
|
||||
|
||||
if (this_space_id != cur_space_id) {
|
||||
if (space != NULL) {
|
||||
space->release();
|
||||
if (space) {
|
||||
space->release_for_io();
|
||||
}
|
||||
|
||||
cur_space_id = this_space_id;
|
||||
space = fil_space_acquire_silent(cur_space_id);
|
||||
|
||||
if (space != NULL) {
|
||||
zip_size = space->zip_size();
|
||||
if (!space) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool ok = space->acquire_for_io();
|
||||
space->release();
|
||||
|
||||
if (!ok) {
|
||||
space = nullptr;
|
||||
continue;
|
||||
}
|
||||
|
||||
zip_size = space->zip_size();
|
||||
}
|
||||
|
||||
/* JAN: TODO: As we use background page read below,
|
||||
if tablespace is encrypted we cant use it. */
|
||||
if (space == NULL ||
|
||||
(space && space->crypt_data &&
|
||||
if (!space || dump[i].page_no() >= space->get_size() ||
|
||||
(space->crypt_data &&
|
||||
space->crypt_data->encryption != FIL_ENCRYPTION_OFF &&
|
||||
space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED)) {
|
||||
continue;
|
||||
@@ -671,11 +689,12 @@ buf_load()
|
||||
continue;
|
||||
}
|
||||
|
||||
buf_read_page_background(dump[i], zip_size, true);
|
||||
space->reacquire_for_io();
|
||||
buf_read_page_background(space, dump[i], zip_size, true);
|
||||
|
||||
if (buf_load_abort_flag) {
|
||||
if (space != NULL) {
|
||||
space->release();
|
||||
if (space) {
|
||||
space->release_for_io();
|
||||
}
|
||||
buf_load_abort_flag = false;
|
||||
ut_free(dump);
|
||||
@@ -702,8 +721,8 @@ buf_load()
|
||||
#endif
|
||||
}
|
||||
|
||||
if (space != NULL) {
|
||||
space->release();
|
||||
if (space) {
|
||||
space->release_for_io();
|
||||
}
|
||||
|
||||
ut_free(dump);
|
||||
|
@@ -782,6 +782,11 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
|
||||
{
|
||||
ut_ad(bpage->in_file());
|
||||
ut_ad(bpage->ready_for_flush());
|
||||
ut_ad((space->purpose == FIL_TYPE_TEMPORARY) ==
|
||||
(space == fil_system.temp_space));
|
||||
ut_ad(space->purpose == FIL_TYPE_TABLESPACE ||
|
||||
space->atomic_write_supported);
|
||||
ut_ad(space->pending_io());
|
||||
|
||||
rw_lock_t *rw_lock;
|
||||
|
||||
@@ -807,11 +812,6 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
|
||||
io_fix and oldest_modification()!=0. Thus, it cannot be relocated in
|
||||
the buffer pool or removed from flush_list or LRU_list. */
|
||||
|
||||
ut_ad((space->purpose == FIL_TYPE_TEMPORARY) ==
|
||||
(space == fil_system.temp_space));
|
||||
ut_ad(space->purpose == FIL_TYPE_TABLESPACE ||
|
||||
space->atomic_write_supported);
|
||||
|
||||
DBUG_PRINT("ib_buf", ("%s %u page %u:%u",
|
||||
lru ? "LRU" : "flush_list",
|
||||
bpage->id().space(), bpage->id().page_no()));
|
||||
@@ -850,19 +850,22 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
|
||||
}
|
||||
}
|
||||
|
||||
if (status == buf_page_t::FREED)
|
||||
buf_release_freed_page(&block->page);
|
||||
else
|
||||
{
|
||||
space->reacquire_for_io();
|
||||
ut_ad(status == buf_page_t::NORMAL || status == buf_page_t::INIT_ON_FLUSH);
|
||||
size_t size, orig_size;
|
||||
ulint type= IORequest::WRITE;
|
||||
IORequest::Type type= lru ? IORequest::WRITE_LRU : IORequest::WRITE_ASYNC;
|
||||
|
||||
if (UNIV_UNLIKELY(!rw_lock)) /* ROW_FORMAT=COMPRESSED */
|
||||
{
|
||||
ut_ad(!space->full_crc32());
|
||||
ut_ad(!space->is_compressed()); /* not page_compressed */
|
||||
orig_size= size= bpage->zip_size();
|
||||
if (status != buf_page_t::FREED)
|
||||
{
|
||||
buf_flush_update_zip_checksum(frame, orig_size);
|
||||
buf_flush_update_zip_checksum(frame, size);
|
||||
frame= buf_page_encrypt(space, bpage, frame, &size);
|
||||
}
|
||||
ut_ad(size == bpage->zip_size());
|
||||
}
|
||||
else
|
||||
@@ -870,8 +873,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
|
||||
byte *page= block->frame;
|
||||
orig_size= size= block->physical_size();
|
||||
|
||||
if (status == buf_page_t::FREED);
|
||||
else if (space->full_crc32())
|
||||
if (space->full_crc32())
|
||||
{
|
||||
/* innodb_checksum_algorithm=full_crc32 is not implemented for
|
||||
ROW_FORMAT=COMPRESSED pages. */
|
||||
@@ -888,44 +890,26 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
|
||||
|
||||
#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32
|
||||
if (size != orig_size && space->punch_hole)
|
||||
type|= IORequest::PUNCH_HOLE;
|
||||
type= lru ? IORequest::PUNCH_LRU : IORequest::PUNCH;
|
||||
#else
|
||||
DBUG_EXECUTE_IF("ignore_punch_hole",
|
||||
if (size != orig_size && space->punch_hole)
|
||||
type|= IORequest::PUNCH_HOLE;);
|
||||
type= lru ? IORequest::PUNCH_LRU : IORequest::PUNCH;);
|
||||
#endif
|
||||
frame=page;
|
||||
}
|
||||
|
||||
IORequest request(type, bpage, lru);
|
||||
|
||||
ut_ad(status == bpage->status);
|
||||
|
||||
switch (status) {
|
||||
default:
|
||||
ut_ad(status == buf_page_t::FREED);
|
||||
buf_release_freed_page(bpage);
|
||||
break;
|
||||
case buf_page_t::NORMAL:
|
||||
if (space->use_doublewrite())
|
||||
{
|
||||
ut_ad(!srv_read_only_mode);
|
||||
if (lru)
|
||||
buf_pool.n_flush_LRU++;
|
||||
else
|
||||
buf_pool.n_flush_list++;
|
||||
buf_dblwr.add_to_batch(bpage, lru, size);
|
||||
break;
|
||||
}
|
||||
/* fall through */
|
||||
case buf_page_t::INIT_ON_FLUSH:
|
||||
if (lru)
|
||||
buf_pool.n_flush_LRU++;
|
||||
if (status != buf_page_t::NORMAL || !space->use_doublewrite())
|
||||
space->io(IORequest(type, bpage),
|
||||
bpage->physical_offset(), size, frame, bpage);
|
||||
else
|
||||
buf_pool.n_flush_list++;
|
||||
/* FIXME: pass space to fil_io() */
|
||||
fil_io(request, false, bpage->id(), bpage->zip_size(), 0,
|
||||
bpage->physical_size(), frame, bpage);
|
||||
buf_dblwr.add_to_batch(space, IORequest(type, bpage), size);
|
||||
}
|
||||
|
||||
/* Increment the I/O operation count used for selecting LRU policy. */
|
||||
@@ -973,8 +957,7 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
|
||||
? static_cast<uint32_t>(s) : read_ahead;
|
||||
page_id_t low= id - (id.page_no() % buf_flush_area);
|
||||
page_id_t high= low + buf_flush_area;
|
||||
high.set_page_no(std::min(high.page_no(),
|
||||
static_cast<uint32_t>(space.committed_size - 1)));
|
||||
high.set_page_no(std::min(high.page_no(), space.last_page_number()));
|
||||
|
||||
if (!contiguous)
|
||||
{
|
||||
@@ -1018,13 +1001,12 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
|
||||
return i;
|
||||
}
|
||||
|
||||
MY_ATTRIBUTE((nonnull))
|
||||
/** Write punch-hole or zeroes of the freed ranges when
|
||||
innodb_immediate_scrub_data_uncompressed from the freed ranges.
|
||||
@param[in] space tablespace which contains freed ranges
|
||||
@param[in] freed_ranges freed ranges of the page to be flushed */
|
||||
@param space tablespace which may contain ranges of freed pages */
|
||||
static void buf_flush_freed_pages(fil_space_t *space)
|
||||
{
|
||||
ut_ad(space != NULL);
|
||||
const bool punch_hole= space->punch_hole;
|
||||
if (!srv_immediate_scrub_data_uncompressed && !punch_hole)
|
||||
return;
|
||||
@@ -1043,27 +1025,24 @@ static void buf_flush_freed_pages(fil_space_t *space)
|
||||
|
||||
for (const auto &range : freed_ranges)
|
||||
{
|
||||
ulint page_size= space->zip_size();
|
||||
if (!page_size)
|
||||
page_size= srv_page_size;
|
||||
const ulint physical_size= space->physical_size();
|
||||
|
||||
if (punch_hole)
|
||||
{
|
||||
const auto len= (range.last - range.first + 1) * page_size;
|
||||
const page_id_t page_id(space->id, range.first);
|
||||
fil_io_t fio= fil_io(IORequestWrite, true, page_id, space->zip_size(),
|
||||
0, len, nullptr, nullptr, false, true);
|
||||
if (fio.node)
|
||||
fio.node->space->release_for_io();
|
||||
space->reacquire_for_io();
|
||||
space->io(IORequest(IORequest::PUNCH_RANGE),
|
||||
os_offset_t{range.first} * physical_size,
|
||||
(range.last - range.first + 1) * physical_size,
|
||||
nullptr);
|
||||
}
|
||||
else if (srv_immediate_scrub_data_uncompressed)
|
||||
{
|
||||
for (auto i= range.first; i <= range.last; i++)
|
||||
for (os_offset_t i= range.first; i <= range.last; i++)
|
||||
{
|
||||
const page_id_t page_id(space->id, i);
|
||||
fil_io(IORequestWrite, false, page_id, space->zip_size(), 0,
|
||||
space->zip_size() ? space->zip_size() : srv_page_size,
|
||||
const_cast<byte*>(field_ref_zero), nullptr, false, false);
|
||||
space->reacquire_for_io();
|
||||
space->io(IORequest(IORequest::WRITE_ASYNC),
|
||||
i * physical_size, physical_size,
|
||||
const_cast<byte*>(field_ref_zero));
|
||||
}
|
||||
}
|
||||
buf_pool.stat.n_pages_written+= (range.last - range.first + 1);
|
||||
@@ -1093,7 +1072,8 @@ static ulint buf_flush_try_neighbors(fil_space_t *space,
|
||||
ut_ad(page_id >= id);
|
||||
ut_ad(page_id < high);
|
||||
|
||||
for (ulint id_fold= id.fold(); id < high; ++id, ++id_fold)
|
||||
for (ulint id_fold= id.fold(); id < high && !space->is_stopping();
|
||||
++id, ++id_fold)
|
||||
{
|
||||
if (count + n_flushed >= n_to_flush)
|
||||
{
|
||||
@@ -1190,7 +1170,7 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
|
||||
@retval nullptr if the pages for this tablespace should be discarded */
|
||||
static fil_space_t *buf_flush_space(const uint32_t id)
|
||||
{
|
||||
fil_space_t *space= fil_space_acquire_for_io(id);
|
||||
fil_space_t *space= fil_space_t::get_for_io(id);
|
||||
if (space)
|
||||
buf_flush_freed_pages(space);
|
||||
return space;
|
||||
@@ -1204,6 +1184,37 @@ struct flush_counters_t
|
||||
ulint evicted;
|
||||
};
|
||||
|
||||
/** Try to discard a dirty page.
|
||||
@param bpage dirty page whose tablespace is not accessible */
|
||||
static void buf_flush_discard_page(buf_page_t *bpage)
|
||||
{
|
||||
mysql_mutex_assert_owner(&buf_pool.mutex);
|
||||
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
|
||||
ut_ad(bpage->in_file());
|
||||
ut_ad(bpage->oldest_modification());
|
||||
|
||||
rw_lock_t *rw_lock;
|
||||
|
||||
if (bpage->state() != BUF_BLOCK_FILE_PAGE)
|
||||
rw_lock= nullptr;
|
||||
else
|
||||
{
|
||||
rw_lock= &reinterpret_cast<buf_block_t*>(bpage)->lock;
|
||||
if (!rw_lock_sx_lock_nowait(rw_lock, 0))
|
||||
return;
|
||||
}
|
||||
|
||||
bpage->status= buf_page_t::NORMAL;
|
||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||
buf_flush_remove(bpage);
|
||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||
|
||||
if (rw_lock)
|
||||
rw_lock_sx_unlock(rw_lock);
|
||||
|
||||
buf_LRU_free_page(bpage, true);
|
||||
}
|
||||
|
||||
/** Flush dirty blocks from the end of the LRU list.
|
||||
@param max maximum number of blocks to make available in buf_pool.free
|
||||
@param n counts of flushed and evicted pages */
|
||||
@@ -1219,6 +1230,9 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
|
||||
const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN
|
||||
? 0 : srv_flush_neighbors;
|
||||
fil_space_t *space= nullptr;
|
||||
uint32_t last_space_id= FIL_NULL;
|
||||
static_assert(FIL_NULL > SRV_TMP_SPACE_ID, "consistency");
|
||||
static_assert(FIL_NULL > SRV_SPACE_ID_UPPER_BOUND, "consistency");
|
||||
|
||||
for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU);
|
||||
bpage && n->flushed + n->evicted < max &&
|
||||
@@ -1243,14 +1257,26 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
|
||||
const page_id_t page_id(bpage->id());
|
||||
const uint32_t space_id= page_id.space();
|
||||
if (!space || space->id != space_id)
|
||||
{
|
||||
if (last_space_id != space_id)
|
||||
{
|
||||
if (space)
|
||||
space->release_for_io();
|
||||
space= buf_flush_space(space_id);
|
||||
if (!space)
|
||||
continue;
|
||||
last_space_id= space_id;
|
||||
}
|
||||
if (neighbors && space->is_rotational())
|
||||
else
|
||||
ut_ad(!space);
|
||||
}
|
||||
else if (space->is_stopping())
|
||||
{
|
||||
space->release_for_io();
|
||||
space= nullptr;
|
||||
}
|
||||
|
||||
if (!space)
|
||||
buf_flush_discard_page(bpage);
|
||||
else if (neighbors && space->is_rotational())
|
||||
{
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
n->flushed+= buf_flush_try_neighbors(space, page_id, neighbors == 1,
|
||||
@@ -1328,6 +1354,9 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
|
||||
const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN
|
||||
? 0 : srv_flush_neighbors;
|
||||
fil_space_t *space= nullptr;
|
||||
uint32_t last_space_id= FIL_NULL;
|
||||
static_assert(FIL_NULL > SRV_TMP_SPACE_ID, "consistency");
|
||||
static_assert(FIL_NULL > SRV_SPACE_ID_UPPER_BOUND, "consistency");
|
||||
|
||||
/* Start from the end of the list looking for a suitable block to be
|
||||
flushed. */
|
||||
@@ -1360,14 +1389,26 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
|
||||
const page_id_t page_id(bpage->id());
|
||||
const uint32_t space_id= page_id.space();
|
||||
if (!space || space->id != space_id)
|
||||
{
|
||||
if (last_space_id != space_id)
|
||||
{
|
||||
if (space)
|
||||
space->release_for_io();
|
||||
space= buf_flush_space(space_id);
|
||||
if (!space)
|
||||
continue;
|
||||
last_space_id= space_id;
|
||||
}
|
||||
if (neighbors && space->is_rotational())
|
||||
else
|
||||
ut_ad(!space);
|
||||
}
|
||||
else if (space->is_stopping())
|
||||
{
|
||||
space->release_for_io();
|
||||
space= nullptr;
|
||||
}
|
||||
|
||||
if (!space)
|
||||
buf_flush_discard_page(bpage);
|
||||
else if (neighbors && space->is_rotational())
|
||||
{
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
count+= buf_flush_try_neighbors(space, page_id, neighbors == 1,
|
||||
@@ -1476,10 +1517,9 @@ ulint buf_flush_lists(ulint max_n, lsn_t lsn)
|
||||
while not holding buf_pool.flush_list_mutex */
|
||||
if (running || !UT_LIST_GET_LEN(buf_pool.flush_list))
|
||||
{
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
if (running)
|
||||
return 0;
|
||||
if (!running)
|
||||
mysql_cond_broadcast(cond);
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
return 0;
|
||||
}
|
||||
n_flush++;
|
||||
|
@@ -261,26 +261,23 @@ flag is cleared and the x-lock released by an i/o-handler thread.
|
||||
@param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED
|
||||
if we are trying
|
||||
to read from a non-existent tablespace
|
||||
@param[in,out] space tablespace
|
||||
@param[in] sync true if synchronous aio is desired
|
||||
@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
|
||||
@param[in] page_id page id
|
||||
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
|
||||
@param[in] unzip true=request uncompressed page
|
||||
@param[in] ignore whether to ignore out-of-bounds page_id
|
||||
@return 1 if a read request was queued, 0 if the page already resided
|
||||
in buf_pool, or if the page is in the doublewrite buffer blocks in
|
||||
which case it is never read into the pool, or if the tablespace does
|
||||
not exist or is being dropped */
|
||||
@return whether a read request was queued */
|
||||
static
|
||||
ulint
|
||||
bool
|
||||
buf_read_page_low(
|
||||
dberr_t* err,
|
||||
fil_space_t* space,
|
||||
bool sync,
|
||||
ulint mode,
|
||||
const page_id_t page_id,
|
||||
ulint zip_size,
|
||||
bool unzip,
|
||||
bool ignore = false)
|
||||
bool unzip)
|
||||
{
|
||||
buf_page_t* bpage;
|
||||
|
||||
@@ -290,17 +287,22 @@ buf_read_page_low(
|
||||
ib::error() << "Trying to read doublewrite buffer page "
|
||||
<< page_id;
|
||||
ut_ad(0);
|
||||
return(0);
|
||||
nothing_read:
|
||||
space->release_for_io();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id)) {
|
||||
if (sync) {
|
||||
} else if (trx_sys_hdr_page(page_id)
|
||||
|| ibuf_bitmap_page(page_id, zip_size)
|
||||
|| (!recv_no_ibuf_operations
|
||||
&& ibuf_page(page_id, zip_size, nullptr))) {
|
||||
|
||||
/* Trx sys header is so low in the latching order that we play
|
||||
safe and do not leave the i/o-completion to an asynchronous
|
||||
i/o-thread. Ibuf bitmap pages must always be read with
|
||||
i/o-thread. Change buffer pages must always be read with
|
||||
syncronous i/o, to make sure they do not get involved in
|
||||
thread deadlocks. */
|
||||
|
||||
sync = true;
|
||||
}
|
||||
|
||||
@@ -311,20 +313,19 @@ buf_read_page_low(
|
||||
bpage = buf_page_init_for_read(mode, page_id, zip_size, unzip);
|
||||
|
||||
if (bpage == NULL) {
|
||||
goto nothing_read;
|
||||
}
|
||||
|
||||
return(0);
|
||||
ut_ad(bpage->in_file());
|
||||
|
||||
if (sync) {
|
||||
thd_wait_begin(nullptr, THD_WAIT_DISKIO);
|
||||
}
|
||||
|
||||
DBUG_LOG("ib_buf",
|
||||
"read page " << page_id << " zip_size=" << zip_size
|
||||
<< " unzip=" << unzip << ',' << (sync ? "sync" : "async"));
|
||||
|
||||
ut_ad(bpage->in_file());
|
||||
|
||||
if (sync) {
|
||||
thd_wait_begin(NULL, THD_WAIT_DISKIO);
|
||||
}
|
||||
|
||||
void* dst;
|
||||
|
||||
if (zip_size) {
|
||||
@@ -335,20 +336,18 @@ buf_read_page_low(
|
||||
dst = ((buf_block_t*) bpage)->frame;
|
||||
}
|
||||
|
||||
fil_io_t fio = fil_io(
|
||||
IORequestRead, sync, page_id, zip_size, 0,
|
||||
zip_size ? zip_size : srv_page_size,
|
||||
dst, bpage, ignore);
|
||||
const ulint len = zip_size ? zip_size : srv_page_size;
|
||||
|
||||
auto fio = space->io(IORequest(sync
|
||||
? IORequest::READ_SYNC
|
||||
: IORequest::READ_ASYNC),
|
||||
page_id.page_no() * len, len, dst, bpage);
|
||||
*err= fio.err;
|
||||
|
||||
if (UNIV_UNLIKELY(fio.err != DB_SUCCESS)) {
|
||||
if (ignore || fio.err == DB_TABLESPACE_DELETED) {
|
||||
if (!sync || fio.err == DB_TABLESPACE_DELETED) {
|
||||
buf_pool.corrupted_evict(bpage);
|
||||
if (sync && fio.node) {
|
||||
fio.node->space->release_for_io();
|
||||
}
|
||||
return(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
ut_error;
|
||||
@@ -357,16 +356,16 @@ buf_read_page_low(
|
||||
if (sync) {
|
||||
thd_wait_end(NULL);
|
||||
|
||||
/* The i/o was already completed in fil_io() */
|
||||
/* The i/o was already completed in space->io() */
|
||||
*err = buf_page_read_complete(bpage, *fio.node);
|
||||
fio.node->space->release_for_io();
|
||||
space->release_for_io();
|
||||
|
||||
if (*err != DB_SUCCESS) {
|
||||
return(0);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Applies a random read-ahead in buf_pool if there are at least a threshold
|
||||
@@ -411,7 +410,7 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
|
||||
ulint count= 5 + buf_read_ahead_area / 8;
|
||||
const page_id_t low= page_id - (page_id.page_no() % buf_read_ahead_area);
|
||||
page_id_t high= low + buf_read_ahead_area;
|
||||
high.set_page_no(std::min(high.page_no(), space->committed_size - 1));
|
||||
high.set_page_no(std::min(high.page_no(), space->last_page_number()));
|
||||
|
||||
/* Count how many blocks in the area have been recently accessed,
|
||||
that is, reside near the start of the LRU list. */
|
||||
@@ -427,10 +426,14 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
|
||||
goto read_ahead;
|
||||
}
|
||||
|
||||
no_read_ahead:
|
||||
space->release();
|
||||
return 0;
|
||||
|
||||
read_ahead:
|
||||
if (!space->acquire_for_io())
|
||||
goto no_read_ahead;
|
||||
|
||||
/* Read all the suitable blocks within the area */
|
||||
const ulint ibuf_mode= ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
|
||||
|
||||
@@ -441,13 +444,16 @@ read_ahead:
|
||||
if (space->is_stopping())
|
||||
break;
|
||||
dberr_t err;
|
||||
count+= buf_read_page_low(&err, false, ibuf_mode, i, zip_size, false);
|
||||
space->reacquire_for_io();
|
||||
if (buf_read_page_low(&err, space, false, ibuf_mode, i, zip_size, false))
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count)
|
||||
DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
|
||||
count, space->chain.start->name,
|
||||
low.page_no()));
|
||||
space->release_for_io();
|
||||
space->release();
|
||||
|
||||
/* Read ahead is considered one I/O operation for the purpose of
|
||||
@@ -472,41 +478,49 @@ after decryption normal page checksum does not match.
|
||||
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
|
||||
dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)
|
||||
{
|
||||
dberr_t err = DB_SUCCESS;
|
||||
|
||||
ulint count = buf_read_page_low(
|
||||
&err, true, BUF_READ_ANY_PAGE, page_id, zip_size, false);
|
||||
|
||||
srv_stats.buf_pool_reads.add(count);
|
||||
|
||||
if (err == DB_TABLESPACE_DELETED) {
|
||||
fil_space_t *space= fil_space_acquire(page_id.space());
|
||||
if (!space)
|
||||
{
|
||||
ib::info() << "trying to read page " << page_id
|
||||
<< " in nonexisting or being-dropped tablespace";
|
||||
return DB_TABLESPACE_DELETED;
|
||||
}
|
||||
else if (!space->acquire_for_io())
|
||||
{
|
||||
ib::warn() << "unable to read " << page_id << " from tablespace "
|
||||
<< space->name;
|
||||
space->release();
|
||||
return DB_PAGE_CORRUPTED;
|
||||
}
|
||||
|
||||
/* Increment number of I/O operations used for LRU policy. */
|
||||
buf_LRU_stat_inc_io();
|
||||
space->release();
|
||||
|
||||
return(err);
|
||||
dberr_t err;
|
||||
if (buf_read_page_low(&err, space, true, BUF_READ_ANY_PAGE,
|
||||
page_id, zip_size, false))
|
||||
srv_stats.buf_pool_reads.add(1);
|
||||
|
||||
buf_LRU_stat_inc_io();
|
||||
return err;
|
||||
}
|
||||
|
||||
/** High-level function which reads a page asynchronously from a file to the
|
||||
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
|
||||
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
|
||||
released by the i/o-handler thread.
|
||||
@param[in,out] space tablespace
|
||||
@param[in] page_id page id
|
||||
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
|
||||
@param[in] sync true if synchronous aio is desired */
|
||||
void
|
||||
buf_read_page_background(const page_id_t page_id, ulint zip_size, bool sync)
|
||||
void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
|
||||
ulint zip_size, bool sync)
|
||||
{
|
||||
ulint count;
|
||||
dberr_t err;
|
||||
|
||||
count = buf_read_page_low(
|
||||
&err, sync,
|
||||
BUF_READ_ANY_PAGE,
|
||||
page_id, zip_size, false, true);
|
||||
if (buf_read_page_low(&err, space, sync, BUF_READ_ANY_PAGE,
|
||||
page_id, zip_size, false)) {
|
||||
srv_stats.buf_pool_reads.add(1);
|
||||
}
|
||||
|
||||
switch (err) {
|
||||
case DB_SUCCESS:
|
||||
@@ -528,8 +542,6 @@ buf_read_page_background(const page_id_t page_id, ulint zip_size, bool sync)
|
||||
<< page_id;
|
||||
}
|
||||
|
||||
srv_stats.buf_pool_reads.add(count);
|
||||
|
||||
/* We do not increment number of I/O operations used for LRU policy
|
||||
here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
|
||||
about evicting uncompressed version of compressed pages from the
|
||||
@@ -598,10 +610,19 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
|
||||
fil_space_t *space= fil_space_acquire(page_id.space());
|
||||
if (!space)
|
||||
return 0;
|
||||
if (high_1.page_no() >= space->committed_size)
|
||||
else
|
||||
{
|
||||
bool ok= space->acquire_for_io();
|
||||
space->release();
|
||||
if (!ok)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (high_1.page_no() > space->last_page_number())
|
||||
{
|
||||
/* The area is not whole. */
|
||||
space->release();
|
||||
fail:
|
||||
space->release_for_io();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -628,8 +649,7 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
|
||||
{
|
||||
hard_fail:
|
||||
hash_lock->read_unlock();
|
||||
space->release();
|
||||
return 0;
|
||||
goto fail;
|
||||
}
|
||||
const byte *f;
|
||||
switch (UNIV_EXPECT(bpage->state(), BUF_BLOCK_FILE_PAGE)) {
|
||||
@@ -661,7 +681,7 @@ hard_fail:
|
||||
if (id != new_low && id != new_high_1)
|
||||
/* This is not a border page of the area: return */
|
||||
goto hard_fail;
|
||||
if (new_high_1.page_no() >= space->committed_size)
|
||||
if (new_high_1.page_no() > space->last_page_number())
|
||||
/* The area is not whole */
|
||||
goto hard_fail;
|
||||
}
|
||||
@@ -671,8 +691,7 @@ failed:
|
||||
hash_lock->read_unlock();
|
||||
if (--count)
|
||||
continue;
|
||||
space->release();
|
||||
return 0;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
const unsigned accessed= bpage->is_accessed();
|
||||
@@ -702,7 +721,8 @@ failed:
|
||||
if (space->is_stopping())
|
||||
break;
|
||||
dberr_t err;
|
||||
count+= buf_read_page_low(&err, false, ibuf_mode, new_low, zip_size,
|
||||
space->reacquire_for_io();
|
||||
count+= buf_read_page_low(&err, space, false, ibuf_mode, new_low, zip_size,
|
||||
false);
|
||||
}
|
||||
|
||||
@@ -710,7 +730,7 @@ failed:
|
||||
DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
|
||||
count, space->chain.start->name,
|
||||
new_low.page_no()));
|
||||
space->release();
|
||||
space->release_for_io();
|
||||
|
||||
/* Read ahead is considered one I/O operation for the purpose of
|
||||
LRU policy decision. */
|
||||
@@ -721,24 +741,19 @@ failed:
|
||||
}
|
||||
|
||||
/** Issues read requests for pages which recovery wants to read in.
|
||||
@param[in] sync true if the caller wants this function to wait
|
||||
for the highest address page to get read in, before this function returns
|
||||
@param[in] space_id tablespace id
|
||||
@param[in] page_nos array of page numbers to read, with the
|
||||
highest page number the last in the array
|
||||
@param[in] n number of page numbers in the array */
|
||||
void buf_read_recv_pages(bool sync, ulint space_id, const uint32_t *page_nos,
|
||||
ulint n)
|
||||
void buf_read_recv_pages(ulint space_id, const uint32_t* page_nos, ulint n)
|
||||
{
|
||||
fil_space_t* space = fil_space_get(space_id);
|
||||
fil_space_t* space = fil_space_t::get_for_io(space_id);
|
||||
|
||||
if (space == NULL) {
|
||||
/* The tablespace is missing: do nothing */
|
||||
if (!space) {
|
||||
/* The tablespace is missing or unreadable: do nothing */
|
||||
return;
|
||||
}
|
||||
|
||||
fil_space_open_if_needed(space);
|
||||
|
||||
const ulint zip_size = space->zip_size();
|
||||
|
||||
for (ulint i = 0; i < n; i++) {
|
||||
@@ -769,9 +784,10 @@ void buf_read_recv_pages(bool sync, ulint space_id, const uint32_t *page_nos,
|
||||
}
|
||||
|
||||
dberr_t err;
|
||||
buf_read_page_low(
|
||||
&err, sync && i + 1 == n,
|
||||
BUF_READ_ANY_PAGE, cur_page_id, zip_size, true);
|
||||
space->reacquire_for_io();
|
||||
buf_read_page_low(&err, space, false,
|
||||
BUF_READ_ANY_PAGE, cur_page_id, zip_size,
|
||||
true);
|
||||
|
||||
if (err == DB_DECRYPTION_FAILED || err == DB_PAGE_CORRUPTED) {
|
||||
ib::error() << "Recovery failed to read or decrypt "
|
||||
@@ -779,5 +795,8 @@ void buf_read_recv_pages(bool sync, ulint space_id, const uint32_t *page_nos,
|
||||
}
|
||||
}
|
||||
|
||||
DBUG_PRINT("ib_buf", ("recovery read-ahead (%u pages)", n));
|
||||
|
||||
DBUG_PRINT("ib_buf", ("recovery read (%u pages) for %s", n,
|
||||
space->chain.start->name));
|
||||
space->release_for_io();
|
||||
}
|
||||
|
@@ -951,7 +951,7 @@ void dict_drop_index_tree(btr_pcur_t* pcur, trx_t* trx, mtr_t* mtr)
|
||||
if (fil_space_t* s = fil_space_acquire_silent(space_id)) {
|
||||
/* Ensure that the tablespace file exists
|
||||
in order to avoid a crash in buf_page_get_gen(). */
|
||||
if (s->size || fil_space_get_size(space_id)) {
|
||||
if (root_page_no < s->get_size()) {
|
||||
btr_free_if_exists(page_id_t(space_id, root_page_no),
|
||||
s->zip_size(),
|
||||
mach_read_from_8(ptr), mtr);
|
||||
|
@@ -2975,15 +2975,15 @@ err_exit:
|
||||
}
|
||||
|
||||
if (err == DB_SUCCESS && table->is_readable()) {
|
||||
if (table->space && !fil_space_get_size(table->space_id)) {
|
||||
const auto root = dict_table_get_first_index(table)->page;
|
||||
|
||||
if (root >= table->space->get_size()) {
|
||||
corrupted:
|
||||
table->corrupted = true;
|
||||
table->file_unreadable = true;
|
||||
err = DB_CORRUPTION;
|
||||
} else {
|
||||
const page_id_t page_id(
|
||||
table->space->id,
|
||||
dict_table_get_first_index(table)->page);
|
||||
const page_id_t page_id(table->space->id, root);
|
||||
mtr.start();
|
||||
buf_block_t* block = buf_page_get(
|
||||
page_id, table->space->zip_size(),
|
||||
|
@@ -975,8 +975,7 @@ static inline
|
||||
void
|
||||
fil_crypt_read_crypt_data(fil_space_t* space)
|
||||
{
|
||||
if (space->crypt_data || space->size
|
||||
|| !fil_space_get_size(space->id)) {
|
||||
if (space->crypt_data || space->size || !space->get_size()) {
|
||||
/* The encryption metadata has already been read, or
|
||||
the tablespace is not encrypted and the file has been
|
||||
opened already, or the file cannot be accessed,
|
||||
@@ -2246,16 +2245,10 @@ static void fil_crypt_rotation_list_fill()
|
||||
}
|
||||
|
||||
/* Ensure that crypt_data has been initialized. */
|
||||
if (!space->size) {
|
||||
ut_d(const fil_space_t* s=)
|
||||
fil_system.read_page0(space->id);
|
||||
ut_ad(!s || s == space);
|
||||
if (!space->size) {
|
||||
/* Page 0 was not loaded.
|
||||
Skip this tablespace. */
|
||||
if (!space->get_size()) {
|
||||
/* Page 0 was not loaded. Skip this tablespace. */
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
|
||||
/* Skip ENCRYPTION!=DEFAULT tablespaces. */
|
||||
if (space->crypt_data
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -296,8 +296,6 @@ Datafile::read_first_page(bool read_only_mode)
|
||||
m_first_page = static_cast<byte*>(
|
||||
aligned_malloc(UNIV_PAGE_SIZE_MAX, srv_page_size));
|
||||
|
||||
constexpr IORequest request(IORequest::READ |
|
||||
IORequest::DISABLE_PARTIAL_IO_WARNINGS);
|
||||
dberr_t err = DB_ERROR;
|
||||
size_t page_size = UNIV_PAGE_SIZE_MAX;
|
||||
|
||||
@@ -308,7 +306,8 @@ Datafile::read_first_page(bool read_only_mode)
|
||||
ulint n_read = 0;
|
||||
|
||||
err = os_file_read_no_error_handling(
|
||||
request, m_handle, m_first_page, 0, page_size, &n_read);
|
||||
IORequestReadPartial, m_handle, m_first_page, 0,
|
||||
page_size, &n_read);
|
||||
|
||||
if (err == DB_IO_ERROR && n_read >= UNIV_PAGE_SIZE_MIN) {
|
||||
|
||||
|
@@ -130,7 +130,7 @@ Tablespace::open_or_create(bool is_temp)
|
||||
fsp_flags = FSP_FLAGS_PAGE_SSIZE();
|
||||
}
|
||||
|
||||
space = fil_space_create(
|
||||
space = fil_space_t::create(
|
||||
m_name, m_space_id, fsp_flags,
|
||||
is_temp
|
||||
? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE,
|
||||
|
@@ -906,13 +906,10 @@ SysTablespace::open_or_create(
|
||||
if (it != begin) {
|
||||
} else if (is_temp) {
|
||||
ut_ad(space_id() == SRV_TMP_SPACE_ID);
|
||||
space = fil_space_create(
|
||||
space = fil_space_t::create(
|
||||
name(), SRV_TMP_SPACE_ID, flags(),
|
||||
FIL_TYPE_TEMPORARY, NULL);
|
||||
|
||||
mutex_enter(&fil_system.mutex);
|
||||
fil_system.temp_space = space;
|
||||
mutex_exit(&fil_system.mutex);
|
||||
ut_ad(space == fil_system.temp_space);
|
||||
if (!space) {
|
||||
return DB_ERROR;
|
||||
}
|
||||
@@ -920,12 +917,10 @@ SysTablespace::open_or_create(
|
||||
ut_ad(space->full_crc32());
|
||||
} else {
|
||||
ut_ad(space_id() == TRX_SYS_SPACE);
|
||||
space = fil_space_create(
|
||||
space = fil_space_t::create(
|
||||
name(), TRX_SYS_SPACE, it->flags(),
|
||||
FIL_TYPE_TABLESPACE, NULL);
|
||||
mutex_enter(&fil_system.mutex);
|
||||
fil_system.sys_space = space;
|
||||
mutex_exit(&fil_system.mutex);
|
||||
ut_ad(space == fil_system.sys_space);
|
||||
if (!space) {
|
||||
return DB_ERROR;
|
||||
}
|
||||
|
@@ -7044,6 +7044,7 @@ i_s_tablespaces_encryption_fill_table(
|
||||
}
|
||||
|
||||
mutex_enter(&fil_system.mutex);
|
||||
fil_system.freeze_space_list++;
|
||||
|
||||
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
|
||||
space; space = UT_LIST_GET_NEXT(space_list, space)) {
|
||||
@@ -7060,6 +7061,7 @@ i_s_tablespaces_encryption_fill_table(
|
||||
}
|
||||
}
|
||||
|
||||
fil_system.freeze_space_list--;
|
||||
mutex_exit(&fil_system.mutex);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
@@ -2300,7 +2300,7 @@ static void ibuf_read_merge_pages(const uint32_t* space_ids,
|
||||
|
||||
for (ulint i = 0; i < n_stored; i++) {
|
||||
const ulint space_id = space_ids[i];
|
||||
fil_space_t* s = fil_space_acquire_for_io(space_id);
|
||||
fil_space_t* s = fil_space_t::get_for_io(space_id);
|
||||
if (!s) {
|
||||
tablespace_deleted:
|
||||
/* The tablespace was not found: remove all
|
||||
@@ -4631,26 +4631,14 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
|
||||
|
||||
const unsigned zip_size = space->zip_size();
|
||||
const unsigned physical_size = space->physical_size();
|
||||
/* fil_space_t::size and fil_space_t::free_limit would still be 0
|
||||
at this point. So, we will have to read page 0. */
|
||||
ut_ad(!space->free_limit);
|
||||
ut_ad(!space->size);
|
||||
|
||||
uint32_t size= std::min(space->free_limit, space->size);
|
||||
|
||||
if (size == 0) {
|
||||
return(DB_TABLE_NOT_FOUND);
|
||||
}
|
||||
|
||||
mtr_t mtr;
|
||||
uint32_t size;
|
||||
mtr.start();
|
||||
if (buf_block_t* sp = buf_page_get(page_id_t(space->id, 0),
|
||||
zip_size,
|
||||
RW_S_LATCH, &mtr)) {
|
||||
size = std::min(
|
||||
mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT
|
||||
+ sp->frame),
|
||||
mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE
|
||||
+ sp->frame));
|
||||
} else {
|
||||
size = 0;
|
||||
}
|
||||
mtr.commit();
|
||||
|
||||
mutex_enter(&ibuf_mutex);
|
||||
|
||||
|
@@ -978,6 +978,15 @@ public:
|
||||
return zip.ssize ? (UNIV_ZIP_SIZE_MIN >> 1) << zip.ssize : 0;
|
||||
}
|
||||
|
||||
/** @return the byte offset of the page within a file */
|
||||
os_offset_t physical_offset() const
|
||||
{
|
||||
os_offset_t o= id().page_no();
|
||||
return zip.ssize
|
||||
? o << (zip.ssize + (UNIV_ZIP_SIZE_SHIFT_MIN - 1))
|
||||
: o << srv_page_size_shift;
|
||||
}
|
||||
|
||||
/** @return whether the block is mapped to a data file */
|
||||
bool in_file() const
|
||||
{
|
||||
|
@@ -52,10 +52,10 @@ class buf_dblwr_t
|
||||
|
||||
struct element
|
||||
{
|
||||
/** block descriptor */
|
||||
buf_page_t *bpage;
|
||||
/** true=buf_pool.flush_list, false=buf_pool.LRU */
|
||||
bool lru;
|
||||
/** tablespace */
|
||||
fil_space_t *space;
|
||||
/** asynchronous write request */
|
||||
IORequest request;
|
||||
/** payload size in bytes */
|
||||
size_t size;
|
||||
};
|
||||
@@ -103,10 +103,11 @@ public:
|
||||
|
||||
/** Schedule a page write. If the doublewrite memory buffer is full,
|
||||
flush_buffered_writes() will be invoked to make space.
|
||||
@param bpage buffer pool page to be written
|
||||
@param lru true=buf_pool.LRU; false=buf_pool.flush_list
|
||||
@param space tablespace
|
||||
@param request asynchronous write request
|
||||
@param size payload size in bytes */
|
||||
void add_to_batch(buf_page_t *bpage, bool lru, size_t size);
|
||||
void add_to_batch(fil_space_t *space, const IORequest &request,
|
||||
size_t size) MY_ATTRIBUTE((nonnull));
|
||||
|
||||
/** Determine whether the doublewrite buffer is initialized */
|
||||
bool is_initialised() const
|
||||
|
@@ -46,11 +46,13 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size);
|
||||
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
|
||||
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
|
||||
released by the i/o-handler thread.
|
||||
@param[in,out] space tablespace
|
||||
@param[in] page_id page id
|
||||
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
|
||||
@param[in] sync true if synchronous aio is desired */
|
||||
void
|
||||
buf_read_page_background(const page_id_t page_id, ulint zip_size, bool sync);
|
||||
void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
|
||||
ulint zip_size, bool sync)
|
||||
MY_ATTRIBUTE((nonnull));
|
||||
|
||||
/** Applies a random read-ahead in buf_pool if there are at least a threshold
|
||||
value of accessed pages from the random read-ahead area. Does not read any
|
||||
@@ -101,14 +103,11 @@ ulint
|
||||
buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf);
|
||||
|
||||
/** Issues read requests for pages which recovery wants to read in.
|
||||
@param[in] sync true if the caller wants this function to wait
|
||||
for the highest address page to get read in, before this function returns
|
||||
@param[in] space_id tablespace id
|
||||
@param[in] page_nos array of page numbers to read, with the
|
||||
highest page number the last in the array
|
||||
@param[in] n number of page numbers in the array */
|
||||
void buf_read_recv_pages(bool sync, ulint space_id, const uint32_t *page_nos,
|
||||
ulint n);
|
||||
void buf_read_recv_pages(ulint space_id, const uint32_t* page_nos, ulint n);
|
||||
|
||||
/** @name Modes used in read-ahead @{ */
|
||||
/** read only pages belonging to the insert buffer tree */
|
||||
|
@@ -313,6 +313,25 @@ new_range:
|
||||
|
||||
/** Tablespace or log data space */
|
||||
#ifndef UNIV_INNOCHECKSUM
|
||||
struct fil_io_t
|
||||
{
|
||||
/** error code */
|
||||
dberr_t err;
|
||||
/** file; node->space->release_for_io() must follow IORequestRead call */
|
||||
fil_node_t *node;
|
||||
};
|
||||
|
||||
/** Tablespace encryption mode */
|
||||
enum fil_encryption_t
|
||||
{
|
||||
/** Encrypted if innodb_encrypt_tables=ON (srv_encrypt_tables) */
|
||||
FIL_ENCRYPTION_DEFAULT,
|
||||
/** Encrypted */
|
||||
FIL_ENCRYPTION_ON,
|
||||
/** Not encrypted */
|
||||
FIL_ENCRYPTION_OFF
|
||||
};
|
||||
|
||||
struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
|
||||
ilist_node<rotation_list_tag_t>
|
||||
#else
|
||||
@@ -348,8 +367,6 @@ struct fil_space_t
|
||||
/*!< recovered tablespace size in pages;
|
||||
0 if no size change was read from the redo log,
|
||||
or if the size change was implemented */
|
||||
/** the committed size of the tablespace in pages */
|
||||
Atomic_relaxed<uint32_t> committed_size;
|
||||
ulint n_reserved_extents;
|
||||
/*!< number of reserved free extents for
|
||||
ongoing operations like B-tree page split */
|
||||
@@ -357,28 +374,33 @@ struct fil_space_t
|
||||
the tablespace to disk; dropping of the
|
||||
tablespace is forbidden if this is positive */
|
||||
private:
|
||||
/** the committed size of the tablespace in pages */
|
||||
Atomic_relaxed<uint32_t> committed_size;
|
||||
/** Number of pending buffer pool operations accessing the
|
||||
tablespace without holding a table lock or dict_operation_lock
|
||||
S-latch that would prevent the table (and tablespace) from being
|
||||
dropped. An example is encryption key rotation.
|
||||
|
||||
The tablespace cannot be dropped while this is nonzero, or while
|
||||
fil_node_t::n_pending is nonzero.
|
||||
The tablespace cannot be dropped while this is nonzero.
|
||||
|
||||
The most significant bit contains the STOP_NEW_OPS flag. */
|
||||
Atomic_relaxed<size_t> n_pending_ops;
|
||||
Atomic_relaxed<uint32_t> n_pending_ops;
|
||||
/** Number of pending block read or write operations
|
||||
The tablespace object cannot be freed while this is nonzero,
|
||||
but it can be detached from fil_system.
|
||||
|
||||
The most significant bit contains the CLOSING flag. */
|
||||
std::atomic<uint32_t> n_pending_ios;
|
||||
|
||||
/** Flag in n_pending_ops that indicates that the tablespace is being
|
||||
deleted, and no further operations should be performed */
|
||||
static constexpr uint32_t STOP_NEW_OPS= ~(~uint32_t(0) >> 1);
|
||||
/** Flag in n_pending_ios that indicates that the tablespace is a candidate
|
||||
for being closed, and fil_node_t::is_open() can only be trusted after
|
||||
acquiring fil_system.mutex and resetting the flag */
|
||||
static constexpr uint32_t CLOSING= STOP_NEW_OPS;
|
||||
static constexpr uint32_t NOT_CLOSING= ~CLOSING;
|
||||
public:
|
||||
/** Number of pending block read or write operations
|
||||
(when a write is imminent or a read has recently completed).
|
||||
The tablespace object cannot be freed while this is nonzero,
|
||||
but it can be detached from fil_system.
|
||||
Note that fil_node_t::n_pending tracks actual pending I/O requests.
|
||||
Protected by fil_system.mutex and std::atomic. */
|
||||
std::atomic<ulint> n_pending_ios;
|
||||
rw_lock_t latch; /*!< latch protecting the file space storage
|
||||
allocation */
|
||||
UT_LIST_NODE_T(fil_space_t) named_spaces;
|
||||
@@ -484,9 +506,10 @@ public:
|
||||
/** @return whether the storage device is rotational (HDD, not SSD) */
|
||||
inline bool is_rotational() const;
|
||||
|
||||
/** Open each file. Only invoked on fil_system.temp_space.
|
||||
/** Open each file. Never invoked on .ibd files.
|
||||
@param create_new_db whether to skip the call to fil_node_t::read_page0()
|
||||
@return whether all files were opened */
|
||||
bool open();
|
||||
bool open(bool create_new_db);
|
||||
/** Close each file. Only invoked on fil_system.temp_space. */
|
||||
void close();
|
||||
|
||||
@@ -497,17 +520,13 @@ public:
|
||||
size_t referenced() const { return n_pending_ops & ~STOP_NEW_OPS; }
|
||||
|
||||
/** Note that operations on the tablespace must stop or can resume */
|
||||
void set_stopping(bool stopping)
|
||||
{
|
||||
ut_d(auto n=) n_pending_ops.fetch_xor(STOP_NEW_OPS);
|
||||
ut_ad(!(n & STOP_NEW_OPS) == stopping);
|
||||
}
|
||||
inline void set_stopping(bool stopping);
|
||||
|
||||
MY_ATTRIBUTE((warn_unused_result))
|
||||
/** @return whether a tablespace reference was successfully acquired */
|
||||
bool acquire()
|
||||
{
|
||||
size_t n= 0;
|
||||
uint32_t n= 0;
|
||||
while (!n_pending_ops.compare_exchange_strong(n, n + 1,
|
||||
std::memory_order_acquire,
|
||||
std::memory_order_relaxed))
|
||||
@@ -523,30 +542,41 @@ public:
|
||||
ut_ad(n & ~STOP_NEW_OPS);
|
||||
return (n & ~STOP_NEW_OPS) == 1;
|
||||
}
|
||||
/** Acquire a tablespace reference for I/O. */
|
||||
void acquire_for_io() { n_pending_ios++; }
|
||||
/** Release a tablespace reference for I/O. */
|
||||
void release_for_io() { ut_d(auto n=) n_pending_ios--; ut_ad(n); }
|
||||
/** @return whether I/O is pending */
|
||||
bool pending_io() const { return n_pending_ios; }
|
||||
|
||||
/** @return whether the tablespace file can be closed and reopened */
|
||||
bool belongs_in_lru() const
|
||||
MY_ATTRIBUTE((warn_unused_result))
|
||||
/** Acquire a tablespace reference for I/O.
|
||||
@return whether the file is usable */
|
||||
bool acquire_for_io()
|
||||
{
|
||||
switch (purpose) {
|
||||
case FIL_TYPE_TEMPORARY:
|
||||
ut_ad(id == SRV_TMP_SPACE_ID);
|
||||
return false;
|
||||
case FIL_TYPE_IMPORT:
|
||||
ut_ad(id != SRV_TMP_SPACE_ID);
|
||||
return true;
|
||||
case FIL_TYPE_TABLESPACE:
|
||||
ut_ad(id != SRV_TMP_SPACE_ID);
|
||||
return id && !srv_is_undo_tablespace(id);
|
||||
return UNIV_LIKELY(!(n_pending_ios.fetch_add(1, std::memory_order_acquire)&
|
||||
CLOSING)) ||
|
||||
prepare_for_io();
|
||||
}
|
||||
ut_ad(0);
|
||||
return false;
|
||||
|
||||
/** Acquire another tablespace reference for I/O. */
|
||||
inline void reacquire_for_io();
|
||||
|
||||
/** Release a tablespace reference for I/O. */
|
||||
void release_for_io()
|
||||
{
|
||||
ut_d(uint32_t n=) n_pending_ios.fetch_sub(1, std::memory_order_release);
|
||||
ut_ad(n & NOT_CLOSING);
|
||||
}
|
||||
/** @return number of pending reads or writes */
|
||||
uint32_t pending_io() const
|
||||
{ return n_pending_ios.load(std::memory_order_acquire) & NOT_CLOSING; }
|
||||
|
||||
MY_ATTRIBUTE((warn_unused_result))
|
||||
/** Prepare to close the file handle.
|
||||
@return number of pending operations */
|
||||
uint32_t set_closing()
|
||||
{
|
||||
return n_pending_ios.fetch_or(CLOSING, std::memory_order_acquire) &
|
||||
NOT_CLOSING;
|
||||
}
|
||||
/** @return whether close() of the file handle has been requested */
|
||||
bool is_closing() const
|
||||
{ return n_pending_ios.load(std::memory_order_acquire) & CLOSING; }
|
||||
|
||||
/** @return last_freed_lsn */
|
||||
lsn_t get_last_freed_lsn() { return last_freed_lsn; }
|
||||
@@ -835,6 +865,25 @@ public:
|
||||
}
|
||||
|
||||
#ifndef UNIV_INNOCHECKSUM
|
||||
MY_ATTRIBUTE((warn_unused_result))
|
||||
/** Create a tablespace in fil_system.
|
||||
@param name tablespace name
|
||||
@param id tablespace identifier
|
||||
@param flags tablespace flags
|
||||
@param purpose tablespace purpose
|
||||
@param crypt_data encryption information
|
||||
@param mode encryption mode
|
||||
@return pointer to created tablespace, to be filled in with add()
|
||||
@retval nullptr on failure (such as when the same tablespace exists) */
|
||||
static fil_space_t *create(const char *name, ulint id, ulint flags,
|
||||
fil_type_t purpose, fil_space_crypt_t *crypt_data,
|
||||
fil_encryption_t mode= FIL_ENCRYPTION_DEFAULT);
|
||||
|
||||
/** Acquire a tablespace for reading or writing a block.
|
||||
@param id tablespace ID
|
||||
@return the tablespace, or nullptr if missing or inaccessible */
|
||||
static fil_space_t *get_for_io(ulint id);
|
||||
|
||||
/** Add/remove the free page in the freed ranges list.
|
||||
@param[in] offset page number to be added
|
||||
@param[in] free true if page to be freed */
|
||||
@@ -863,8 +912,47 @@ public:
|
||||
std::lock_guard<std::mutex> freed_lock(freed_range_mutex);
|
||||
freed_ranges.add_range(range);
|
||||
}
|
||||
#endif /*!UNIV_INNOCHECKSUM */
|
||||
|
||||
/** Set the tablespace size in pages */
|
||||
void set_sizes(uint32_t s)
|
||||
{
|
||||
ut_ad(id ? !size : (size >= s));
|
||||
size= s; committed_size= s;
|
||||
}
|
||||
|
||||
/** Update committed_size in mtr_t::commit() */
|
||||
void set_committed_size()
|
||||
{
|
||||
ut_ad(rw_lock_own(&latch, RW_LOCK_X));
|
||||
committed_size= size;
|
||||
}
|
||||
|
||||
/** @return the last persisted page number */
|
||||
uint32_t last_page_number() const { return committed_size - 1; }
|
||||
|
||||
/** @return the size in pages (0 if unreadable) */
|
||||
inline uint32_t get_size();
|
||||
|
||||
/** Read or write data.
|
||||
@param type I/O context
|
||||
@param offset offset in bytes
|
||||
@param len number of bytes
|
||||
@param buf the data to be read or written
|
||||
@param bpage buffer block (for type.is_async() completion callback)
|
||||
@return status and file descriptor */
|
||||
fil_io_t io(const IORequest &type, os_offset_t offset, size_t len,
|
||||
void *buf, buf_page_t *bpage= nullptr);
|
||||
/** Flush pending writes from the file system cache to the file */
|
||||
void flush();
|
||||
|
||||
/** Read the first page of a data file.
|
||||
@return whether the page was found valid */
|
||||
bool read_page0();
|
||||
|
||||
private:
|
||||
/** @return whether the file is usable for io() */
|
||||
ATTRIBUTE_COLD bool prepare_for_io();
|
||||
#endif /*!UNIV_INNOCHECKSUM */
|
||||
};
|
||||
|
||||
#ifndef UNIV_INNOCHECKSUM
|
||||
@@ -892,8 +980,6 @@ struct fil_node_t {
|
||||
uint32_t init_size;
|
||||
/** maximum size of the file in database pages (0 if unlimited) */
|
||||
uint32_t max_size;
|
||||
/** count of pending i/o's; is_open must be true if nonzero */
|
||||
ulint n_pending;
|
||||
/** count of pending flushes; is_open must be true if nonzero */
|
||||
ulint n_pending_flushes;
|
||||
/** whether the file is currently being extended */
|
||||
@@ -902,8 +988,6 @@ struct fil_node_t {
|
||||
bool needs_flush;
|
||||
/** link to other files in this tablespace */
|
||||
UT_LIST_NODE_T(fil_node_t) chain;
|
||||
/** link to the fil_system.LRU list (keeping track of open files) */
|
||||
UT_LIST_NODE_T(fil_node_t) LRU;
|
||||
|
||||
/** whether this file could use atomic write (data file) */
|
||||
bool atomic_write;
|
||||
@@ -921,9 +1005,8 @@ struct fil_node_t {
|
||||
}
|
||||
|
||||
/** Read the first page of a data file.
|
||||
@param[in] first whether this is the very first read
|
||||
@return whether the page was found valid */
|
||||
bool read_page0(bool first);
|
||||
bool read_page0();
|
||||
|
||||
/** Determine some file metadata when creating or reading the file.
|
||||
@param file the file that is being created, or OS_FILE_CLOSED */
|
||||
@@ -942,8 +1025,8 @@ struct fil_node_t {
|
||||
@return detached handle or OS_FILE_CLOSED */
|
||||
pfs_os_file_t close_to_free(bool detach_handle= false);
|
||||
|
||||
/** Update the data structures on I/O completion */
|
||||
inline void complete_io(bool write= false);
|
||||
/** Update the data structures on write completion */
|
||||
inline void complete_write();
|
||||
|
||||
private:
|
||||
/** Does stuff common for close() and detach() */
|
||||
@@ -953,6 +1036,13 @@ private:
|
||||
/** Value of fil_node_t::magic_n */
|
||||
#define FIL_NODE_MAGIC_N 89389
|
||||
|
||||
inline void fil_space_t::reacquire_for_io()
|
||||
{
|
||||
ut_d(uint32_t n=) n_pending_ios.fetch_add(1, std::memory_order_relaxed);
|
||||
ut_ad(n & NOT_CLOSING);
|
||||
ut_ad(UT_LIST_GET_FIRST(chain)->is_open());
|
||||
}
|
||||
|
||||
inline void fil_space_t::set_imported()
|
||||
{
|
||||
ut_ad(purpose == FIL_TYPE_IMPORT);
|
||||
@@ -963,11 +1053,9 @@ inline void fil_space_t::set_imported()
|
||||
inline bool fil_space_t::is_rotational() const
|
||||
{
|
||||
for (const fil_node_t *node= UT_LIST_GET_FIRST(chain); node;
|
||||
node = UT_LIST_GET_NEXT(chain, node)) {
|
||||
if (!node->on_ssd) {
|
||||
node= UT_LIST_GET_NEXT(chain, node))
|
||||
if (!node->on_ssd)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1179,16 +1267,6 @@ index */
|
||||
#define fil_page_index_page_check(page) \
|
||||
fil_page_type_is_index(fil_page_get_type(page))
|
||||
|
||||
/** Enum values for encryption table option */
|
||||
enum fil_encryption_t {
|
||||
/** Encrypted if innodb_encrypt_tables=ON (srv_encrypt_tables) */
|
||||
FIL_ENCRYPTION_DEFAULT,
|
||||
/** Encrypted */
|
||||
FIL_ENCRYPTION_ON,
|
||||
/** Not encrypted */
|
||||
FIL_ENCRYPTION_OFF
|
||||
};
|
||||
|
||||
/** Get the file page type.
|
||||
@param[in] page file page
|
||||
@return page type */
|
||||
@@ -1227,7 +1305,6 @@ struct fil_system_t {
|
||||
*/
|
||||
fil_system_t(): m_initialised(false)
|
||||
{
|
||||
UT_LIST_INIT(LRU, &fil_node_t::LRU);
|
||||
UT_LIST_INIT(space_list, &fil_space_t::space_list);
|
||||
UT_LIST_INIT(named_spaces, &fil_space_t::named_spaces);
|
||||
}
|
||||
@@ -1275,30 +1352,23 @@ public:
|
||||
fil_space_t* temp_space; /*!< The innodb_temporary tablespace */
|
||||
/** Map of fil_space_t::id to fil_space_t* */
|
||||
hash_table_t spaces;
|
||||
UT_LIST_BASE_NODE_T(fil_node_t) LRU;
|
||||
/*!< base node for the LRU list of the
|
||||
most recently used open files with no
|
||||
pending i/o's; if we start an i/o on
|
||||
the file, we first remove it from this
|
||||
list, and return it to the start of
|
||||
the list when the i/o ends;
|
||||
log files and the system tablespace are
|
||||
not put to this list: they are opened
|
||||
after the startup, and kept open until
|
||||
shutdown */
|
||||
sized_ilist<fil_space_t, unflushed_spaces_tag_t> unflushed_spaces;
|
||||
/*!< list of those
|
||||
tablespaces whose files contain
|
||||
unflushed writes; those spaces have
|
||||
at least one file node where
|
||||
needs_flush == true */
|
||||
ulint n_open; /*!< number of files currently open */
|
||||
/** number of currently open files; protected by mutex */
|
||||
ulint n_open;
|
||||
ulint max_assigned_id;/*!< maximum space id in the existing
|
||||
tables, or assigned during the time
|
||||
mysqld has been up; at an InnoDB
|
||||
startup we scan the data dictionary
|
||||
and set here the maximum of the
|
||||
space id's of the tables there */
|
||||
/** nonzero if fil_node_open_file_low() should avoid moving the tablespace
|
||||
to the end of space_list, for FIFO policy of try_to_close() */
|
||||
ulint freeze_space_list;
|
||||
UT_LIST_BASE_NODE_T(fil_space_t) space_list;
|
||||
/*!< list of all file spaces */
|
||||
UT_LIST_BASE_NODE_T(fil_space_t) named_spaces;
|
||||
@@ -1312,16 +1382,10 @@ public:
|
||||
key rotation.*/
|
||||
|
||||
bool space_id_reuse_warned;
|
||||
/*!< whether fil_space_create()
|
||||
/*!< whether fil_space_t::create()
|
||||
has issued a warning about
|
||||
potential space_id reuse */
|
||||
|
||||
/** Trigger a call to fil_node_t::read_page0()
|
||||
@param[in] id tablespace identifier
|
||||
@return tablespace
|
||||
@retval NULL if the tablespace does not exist or cannot be read */
|
||||
fil_space_t* read_page0(ulint id);
|
||||
|
||||
/** Return the next tablespace from rotation_list.
|
||||
@param space previous tablespace (NULL to start from the start)
|
||||
@param recheck whether the removal condition needs to be rechecked after
|
||||
@@ -1336,63 +1400,28 @@ public:
|
||||
/** The tablespace memory cache. */
|
||||
extern fil_system_t fil_system;
|
||||
|
||||
/** Update the data structures on I/O completion */
|
||||
inline void fil_node_t::complete_io(bool write)
|
||||
/** Note that operations on the tablespace must stop or can resume */
|
||||
inline void fil_space_t::set_stopping(bool stopping)
|
||||
{
|
||||
ut_ad(mutex_own(&fil_system.mutex));
|
||||
|
||||
if (write)
|
||||
{
|
||||
if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)
|
||||
{
|
||||
/* We don't need to keep track of unflushed changes as user has
|
||||
explicitly disabled buffering. */
|
||||
ut_ad(!space->is_in_unflushed_spaces);
|
||||
ut_ad(!needs_flush);
|
||||
}
|
||||
else if (!space->is_stopping())
|
||||
{
|
||||
needs_flush= true;
|
||||
if (!space->is_in_unflushed_spaces)
|
||||
{
|
||||
space->is_in_unflushed_spaces= true;
|
||||
fil_system.unflushed_spaces.push_front(*space);
|
||||
}
|
||||
}
|
||||
ut_d(auto n=) n_pending_ops.fetch_xor(STOP_NEW_OPS);
|
||||
ut_ad(!(n & STOP_NEW_OPS) == stopping);
|
||||
}
|
||||
|
||||
switch (n_pending--) {
|
||||
case 0:
|
||||
ut_error;
|
||||
case 1:
|
||||
if (space->belongs_in_lru())
|
||||
/* The node must be put back to the LRU list */
|
||||
UT_LIST_ADD_FIRST(fil_system.LRU, this);
|
||||
/** @return the size in pages (0 if unreadable) */
|
||||
inline uint32_t fil_space_t::get_size()
|
||||
{
|
||||
if (!size)
|
||||
{
|
||||
mutex_enter(&fil_system.mutex);
|
||||
read_page0();
|
||||
mutex_exit(&fil_system.mutex);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
#include "fil0crypt.h"
|
||||
|
||||
/** Create a space memory object and put it to the fil_system hash table.
|
||||
Error messages are issued to the server log.
|
||||
@param[in] name tablespace name
|
||||
@param[in] id tablespace identifier
|
||||
@param[in] flags tablespace flags
|
||||
@param[in] purpose tablespace purpose
|
||||
@param[in,out] crypt_data encryption information
|
||||
@param[in] mode encryption mode
|
||||
@return pointer to created tablespace, to be filled in with fil_space_t::add()
|
||||
@retval NULL on failure (such as when the same tablespace exists) */
|
||||
fil_space_t*
|
||||
fil_space_create(
|
||||
const char* name,
|
||||
ulint id,
|
||||
ulint flags,
|
||||
fil_type_t purpose,
|
||||
fil_space_crypt_t* crypt_data,
|
||||
fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT)
|
||||
MY_ATTRIBUTE((warn_unused_result));
|
||||
|
||||
/*******************************************************************//**
|
||||
Assigns a new space id for a new single-table tablespace. This works simply by
|
||||
incrementing the global counter. If 4 billion id's is not enough, we may need
|
||||
@@ -1421,21 +1450,6 @@ fil_space_free(
|
||||
void fil_space_set_recv_size_and_flags(ulint id, uint32_t size,
|
||||
uint32_t flags);
|
||||
|
||||
/*******************************************************************//**
|
||||
Returns the size of the space in pages. The tablespace must be cached in the
|
||||
memory cache.
|
||||
@return space size, 0 if space not found */
|
||||
ulint
|
||||
fil_space_get_size(
|
||||
/*===============*/
|
||||
ulint id); /*!< in: space id */
|
||||
|
||||
/** Opens all system tablespace data files. They stay open until the
|
||||
database server shutdown. This should be called at a server startup after the
|
||||
space objects for the system tablespace have been created. The
|
||||
purpose of this operation is to make sure we never run out of file descriptors
|
||||
if we need to read from the insert buffer. */
|
||||
void fil_open_system_tablespace_files();
|
||||
/** Close all tablespace files at shutdown */
|
||||
void fil_close_all_files();
|
||||
/*******************************************************************//**
|
||||
@@ -1491,14 +1505,6 @@ fil_space_acquire_silent(ulint id)
|
||||
return (fil_space_acquire_low(id, true));
|
||||
}
|
||||
|
||||
/** Acquire a tablespace for reading or writing a block,
|
||||
when it could be dropped concurrently.
|
||||
@param[in] id tablespace ID
|
||||
@return the tablespace
|
||||
@retval NULL if missing */
|
||||
fil_space_t*
|
||||
fil_space_acquire_for_io(ulint id);
|
||||
|
||||
/** Replay a file rename operation if possible.
|
||||
@param[in] space_id tablespace identifier
|
||||
@param[in] name old file name
|
||||
@@ -1674,7 +1680,7 @@ fil_file_readdir_next_file(
|
||||
memory cache. Note that if we have not done a crash recovery at the database
|
||||
startup, there may be many tablespaces which are not yet in the memory cache.
|
||||
@param[in] id Tablespace ID
|
||||
@param[in] name Tablespace name used in fil_space_create().
|
||||
@param[in] name Tablespace name used in fil_space_t::create().
|
||||
@param[in] table_flags table flags
|
||||
@return the tablespace
|
||||
@retval NULL if no matching tablespace exists in the memory cache */
|
||||
@@ -1690,70 +1696,6 @@ fil_space_for_table_exists_in_mem(
|
||||
@return whether the tablespace is at least as big as requested */
|
||||
bool fil_space_extend(fil_space_t *space, uint32_t size);
|
||||
|
||||
struct fil_io_t
|
||||
{
|
||||
/** error code */
|
||||
dberr_t err;
|
||||
/** file; node->space->release_for_io() must follow fil_io(sync=true) call */
|
||||
fil_node_t *node;
|
||||
};
|
||||
|
||||
/** Reads or writes data. This operation could be asynchronous (aio).
|
||||
|
||||
@param[in] type IO context
|
||||
@param[in] sync true if synchronous aio is desired
|
||||
@param[in] page_id page id
|
||||
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
|
||||
@param[in] byte_offset remainder of offset in bytes; in aio this
|
||||
must be divisible by the OS block size
|
||||
@param[in] len how many bytes to read or write; this must
|
||||
not cross a file boundary; in aio this must
|
||||
be a block size multiple
|
||||
@param[in,out] buf buffer where to store read data or from where
|
||||
to write; in aio this must be appropriately
|
||||
aligned
|
||||
@param[in] message message for aio handler if non-sync aio
|
||||
used, else ignored
|
||||
@param[in] ignore whether to ignore errors
|
||||
@param[in] punch_hole punch the hole to the file for page_compressed
|
||||
tablespace
|
||||
@return status and file descriptor */
|
||||
fil_io_t
|
||||
fil_io(
|
||||
const IORequest& type,
|
||||
bool sync,
|
||||
const page_id_t page_id,
|
||||
ulint zip_size,
|
||||
ulint byte_offset,
|
||||
ulint len,
|
||||
void* buf,
|
||||
void* message,
|
||||
bool ignore = false,
|
||||
bool punch_hole = false);
|
||||
|
||||
/**********************************************************************//**
|
||||
Waits for an aio operation to complete. This function is used to write the
|
||||
handler for completed requests. The aio array of pending requests is divided
|
||||
into segments (see os0file.cc for more info). The thread specifies which
|
||||
segment it wants to wait for. */
|
||||
void
|
||||
fil_aio_wait(
|
||||
/*=========*/
|
||||
ulint segment); /*!< in: the number of the segment in the aio
|
||||
array to wait for */
|
||||
/**********************************************************************//**
|
||||
Flushes to disk possible writes cached by the OS. If the space does not exist
|
||||
or is being dropped, does not do anything. */
|
||||
void
|
||||
fil_flush(
|
||||
/*======*/
|
||||
ulint space_id); /*!< in: file space id (this can be a group of
|
||||
log files or a tablespace of the database) */
|
||||
/** Flush a tablespace.
|
||||
@param[in,out] space tablespace to flush */
|
||||
void
|
||||
fil_flush(fil_space_t* space);
|
||||
|
||||
/** Flush to disk the writes in file spaces of the given type
|
||||
possibly cached by the OS. */
|
||||
void fil_flush_file_spaces();
|
||||
@@ -1846,23 +1788,6 @@ inline bool fil_names_write_if_was_clean(fil_space_t* space)
|
||||
return(was_clean);
|
||||
}
|
||||
|
||||
/** During crash recovery, open a tablespace if it had not been opened
|
||||
yet, to get valid size and flags.
|
||||
@param[in,out] space tablespace */
|
||||
inline void fil_space_open_if_needed(fil_space_t* space)
|
||||
{
|
||||
ut_ad(recv_recovery_is_on());
|
||||
|
||||
if (space->size == 0) {
|
||||
/* Initially, size and flags will be set to 0,
|
||||
until the files are opened for the first time.
|
||||
fil_space_get_size() will open the file
|
||||
and adjust the size and flags. */
|
||||
ut_d(ulint size =) fil_space_get_size(space->id);
|
||||
ut_ad(size == space->size);
|
||||
}
|
||||
}
|
||||
|
||||
/** On a log checkpoint, reset fil_names_dirty_and_write() flags
|
||||
and write out FILE_MODIFY and FILE_CHECKPOINT if needed.
|
||||
@param[in] lsn checkpoint LSN
|
||||
|
@@ -1,7 +1,7 @@
|
||||
/*****************************************************************************
|
||||
|
||||
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
||||
Copyright (c) 2014, 2019, MariaDB Corporation.
|
||||
Copyright (c) 2014, 2020, MariaDB Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
@@ -24,9 +24,7 @@ File space management types
|
||||
Created May 26, 2009 Vasil Dimov
|
||||
*******************************************************/
|
||||
|
||||
#ifndef fsp0types_h
|
||||
#define fsp0types_h
|
||||
|
||||
#pragma once
|
||||
#include <cstddef>
|
||||
|
||||
/** The fil_space_t::id of the redo log. All persistent tablespaces
|
||||
@@ -402,4 +400,6 @@ in full crc32 format. */
|
||||
|
||||
/* @} */
|
||||
|
||||
#endif /* fsp0types_h */
|
||||
struct fil_node_t;
|
||||
struct fil_space_t;
|
||||
class buf_page_t;
|
||||
|
@@ -1,48 +0,0 @@
|
||||
/***********************************************************************
|
||||
|
||||
Copyright (c) 2017, 2019, MariaDB Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
||||
|
||||
***********************************************************************/
|
||||
|
||||
/**************************************************//**
|
||||
@file os0api.h
|
||||
The interface to the helper functions.
|
||||
These functions are used on os0file.h where
|
||||
including full full header is not feasible and
|
||||
implemented on buf0buf.cc and fil0fil.cc.
|
||||
*******************************************************/
|
||||
|
||||
#ifndef OS_API_H
|
||||
#define OS_API_H 1
|
||||
|
||||
/** Page control block */
|
||||
class buf_page_t;
|
||||
|
||||
/** File Node */
|
||||
struct fil_node_t;
|
||||
|
||||
/**
|
||||
Calculate the length of trim (punch_hole) operation.
|
||||
@param[in] bpage Page control block
|
||||
@param[in] write_length Write length
|
||||
@return length of the trim or zero. */
|
||||
ulint
|
||||
buf_page_get_trim_length(
|
||||
const buf_page_t* bpage,
|
||||
ulint write_length)
|
||||
MY_ATTRIBUTE((warn_unused_result));
|
||||
|
||||
#endif /* OS_API_H */
|
@@ -37,7 +37,6 @@ Created 10/21/1995 Heikki Tuuri
|
||||
#define os0file_h
|
||||
|
||||
#include "fsp0types.h"
|
||||
#include "os0api.h"
|
||||
#include "tpool.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
@@ -46,10 +45,6 @@ Created 10/21/1995 Heikki Tuuri
|
||||
#include <time.h>
|
||||
#endif /* !_WIN32 */
|
||||
|
||||
/** File node of a tablespace or the log data space */
|
||||
struct fil_node_t;
|
||||
struct fil_space_t;
|
||||
|
||||
extern bool os_has_said_disk_full;
|
||||
|
||||
/** File offset in bytes */
|
||||
@@ -188,117 +183,75 @@ The I/O context that is passed down to the low level IO code */
|
||||
class IORequest
|
||||
{
|
||||
public:
|
||||
constexpr IORequest(ulint type= READ, buf_page_t *bpage= nullptr,
|
||||
bool lru= false) :
|
||||
m_bpage(bpage), m_type(static_cast<uint16_t>(type)), m_LRU(lru) {}
|
||||
|
||||
/** Flags passed in the request, they can be ORred together. */
|
||||
enum {
|
||||
READ = 1,
|
||||
WRITE = 2,
|
||||
|
||||
/** Double write buffer recovery. */
|
||||
DBLWR_RECOVER = 4,
|
||||
|
||||
/** Enumarations below can be ORed to READ/WRITE above*/
|
||||
|
||||
/** Data file */
|
||||
DATA_FILE = 8,
|
||||
|
||||
/** Disable partial read warnings */
|
||||
DISABLE_PARTIAL_IO_WARNINGS = 32,
|
||||
|
||||
/** Use punch hole if available*/
|
||||
PUNCH_HOLE = 64,
|
||||
enum Type
|
||||
{
|
||||
/** Synchronous read */
|
||||
READ_SYNC= 2,
|
||||
/** Asynchronous read; some errors will be ignored */
|
||||
READ_ASYNC= READ_SYNC | 1,
|
||||
/** Possibly partial read; only used with
|
||||
os_file_read_no_error_handling() */
|
||||
READ_MAYBE_PARTIAL= READ_SYNC | 4,
|
||||
/** Read for doublewrite buffer recovery */
|
||||
DBLWR_RECOVER= READ_SYNC | 8,
|
||||
/** Synchronous write */
|
||||
WRITE_SYNC= 16,
|
||||
/** Asynchronous write */
|
||||
WRITE_ASYNC= WRITE_SYNC | 1,
|
||||
/** Write data; evict the block on write completion */
|
||||
WRITE_LRU= WRITE_ASYNC | 32,
|
||||
/** Write data and punch hole for the rest */
|
||||
PUNCH= WRITE_ASYNC | 64,
|
||||
/** Write data and punch hole; evict the block on write completion */
|
||||
PUNCH_LRU= PUNCH | WRITE_LRU,
|
||||
/** Zero out a range of bytes in fil_space_t::io() */
|
||||
PUNCH_RANGE= WRITE_SYNC | 128,
|
||||
};
|
||||
|
||||
/** @return true if it is a read request */
|
||||
bool is_read() const
|
||||
MY_ATTRIBUTE((warn_unused_result))
|
||||
{
|
||||
return((m_type & READ) == READ);
|
||||
}
|
||||
constexpr IORequest(Type type= READ_SYNC, buf_page_t *bpage= nullptr) :
|
||||
bpage(bpage), type(type) {}
|
||||
|
||||
/** @return true if it is a write request */
|
||||
bool is_write() const
|
||||
MY_ATTRIBUTE((warn_unused_result))
|
||||
{
|
||||
return((m_type & WRITE) == WRITE);
|
||||
}
|
||||
constexpr IORequest(const IORequest &old, fil_node_t *node= nullptr) :
|
||||
bpage(old.bpage), node(node), type(old.type) {}
|
||||
|
||||
/** @return true if partial read warning disabled */
|
||||
bool is_partial_io_warning_disabled() const
|
||||
MY_ATTRIBUTE((warn_unused_result))
|
||||
{
|
||||
return !!(m_type & DISABLE_PARTIAL_IO_WARNINGS);
|
||||
}
|
||||
bool is_read() const { return (type & READ_SYNC) != 0; }
|
||||
bool is_write() const { return (type & WRITE_SYNC) != 0; }
|
||||
bool is_LRU() const { return (type & (WRITE_LRU ^ WRITE_ASYNC)) != 0; }
|
||||
bool is_async() const { return (type & (READ_SYNC ^ READ_ASYNC)) != 0; }
|
||||
|
||||
/** @return true if punch hole should be used */
|
||||
bool punch_hole() const
|
||||
MY_ATTRIBUTE((warn_unused_result))
|
||||
{
|
||||
return((m_type & PUNCH_HOLE) == PUNCH_HOLE);
|
||||
}
|
||||
|
||||
/** @return true if the read should be validated */
|
||||
bool validate() const
|
||||
MY_ATTRIBUTE((warn_unused_result))
|
||||
{
|
||||
return(is_read() ^ is_write());
|
||||
}
|
||||
|
||||
/** Set the pointer to file node for IO
|
||||
@param[in] node File node */
|
||||
void set_fil_node(fil_node_t *node) { m_fil_node= node; }
|
||||
|
||||
bool operator==(const IORequest& rhs) const
|
||||
{
|
||||
return(m_type == rhs.m_type);
|
||||
}
|
||||
|
||||
/** @return true if the request is from the dblwr recovery */
|
||||
bool is_dblwr_recover() const
|
||||
MY_ATTRIBUTE((warn_unused_result))
|
||||
{
|
||||
return((m_type & DBLWR_RECOVER) == DBLWR_RECOVER);
|
||||
}
|
||||
|
||||
ulint get_trim_length(ulint write_length) const
|
||||
{
|
||||
return (m_bpage ?
|
||||
buf_page_get_trim_length(m_bpage, write_length)
|
||||
: 0);
|
||||
}
|
||||
|
||||
inline bool should_punch_hole() const;
|
||||
|
||||
/** Free storage space associated with a section of the file.
|
||||
@param[in] fh Open file handle
|
||||
@param[in] off Starting offset (SEEK_SET)
|
||||
@param[in] len Size of the hole
|
||||
/** If requested, free storage space associated with a section of the file.
|
||||
@param off byte offset from the start (SEEK_SET)
|
||||
@param len size of the hole in bytes
|
||||
@return DB_SUCCESS or error code */
|
||||
dberr_t punch_hole(os_file_t fh, os_offset_t off, ulint len);
|
||||
|
||||
/** @return type of page flush (for writes) */
|
||||
bool is_LRU() const { return m_LRU; }
|
||||
dberr_t maybe_punch_hole(os_offset_t off, ulint len)
|
||||
{
|
||||
return off && len && node && (type & (PUNCH ^ WRITE_ASYNC))
|
||||
? punch_hole(off, len)
|
||||
: DB_SUCCESS;
|
||||
}
|
||||
|
||||
private:
|
||||
/** Page to be written on write operation. */
|
||||
buf_page_t* const m_bpage= nullptr;
|
||||
/** Free storage space associated with a section of the file.
|
||||
@param off byte offset from the start (SEEK_SET)
|
||||
@param len size of the hole in bytes
|
||||
@return DB_SUCCESS or error code */
|
||||
dberr_t punch_hole(os_offset_t off, ulint len) const
|
||||
MY_ATTRIBUTE((nonnull));
|
||||
|
||||
/** File node */
|
||||
fil_node_t* m_fil_node= nullptr;
|
||||
public:
|
||||
/** Page to be written on write operation */
|
||||
buf_page_t* const bpage= nullptr;
|
||||
|
||||
/** File descriptor */
|
||||
const fil_node_t *const node= nullptr;
|
||||
|
||||
/** Request type bit flags */
|
||||
const uint16_t m_type;
|
||||
|
||||
/** for writes, type of page flush */
|
||||
const bool m_LRU= false;
|
||||
const Type type;
|
||||
};
|
||||
|
||||
constexpr IORequest IORequestRead(IORequest::READ);
|
||||
constexpr IORequest IORequestWrite(IORequest::WRITE);
|
||||
|
||||
constexpr IORequest IORequestRead(IORequest::READ_SYNC);
|
||||
constexpr IORequest IORequestReadPartial(IORequest::READ_MAYBE_PARTIAL);
|
||||
constexpr IORequest IORequestWrite(IORequest::WRITE_SYNC);
|
||||
|
||||
/** Sparse file size information. */
|
||||
struct os_file_size_t {
|
||||
@@ -313,20 +266,6 @@ struct os_file_size_t {
|
||||
/** Win NT does not allow more than 64 */
|
||||
static const ulint OS_AIO_N_PENDING_IOS_PER_THREAD = 256;
|
||||
|
||||
/** Modes for aio operations @{ */
|
||||
/** Normal asynchronous i/o not for ibuf pages or ibuf bitmap pages */
|
||||
static const ulint OS_AIO_NORMAL = 21;
|
||||
|
||||
/** Asynchronous i/o for ibuf pages or ibuf bitmap pages */
|
||||
static const ulint OS_AIO_IBUF = 22;
|
||||
|
||||
/**Calling thread will wait for the i/o to complete,
|
||||
and perform IO completion routine itself;
|
||||
can be used for any pages, ibuf or non-ibuf. This is used to save
|
||||
CPU time, as we can do with fewer thread switches. */
|
||||
static const ulint OS_AIO_SYNC = 24;
|
||||
/* @} */
|
||||
|
||||
extern ulint os_n_file_reads;
|
||||
extern ulint os_n_file_writes;
|
||||
extern ulint os_n_fsyncs;
|
||||
@@ -669,9 +608,9 @@ The wrapper functions have the prefix of "innodb_". */
|
||||
# define os_file_close(file) \
|
||||
pfs_os_file_close_func(file, __FILE__, __LINE__)
|
||||
|
||||
# define os_aio(type, mode, name, file, buf, offset, \
|
||||
# define os_aio(type, name, file, buf, offset, \
|
||||
n, read_only, message1, message2) \
|
||||
pfs_os_aio_func(type, mode, name, file, buf, offset, \
|
||||
pfs_os_aio_func(type, name, file, buf, offset, \
|
||||
n, read_only, message1, message2, \
|
||||
__FILE__, __LINE__)
|
||||
|
||||
@@ -859,7 +798,6 @@ function!
|
||||
Performance schema wrapper function of os_aio() which requests
|
||||
an asynchronous I/O operation.
|
||||
@param[in,out] type IO request context
|
||||
@param[in] mode IO mode
|
||||
@param[in] name Name of the file or path as NUL terminated
|
||||
string
|
||||
@param[in] file Open file handle
|
||||
@@ -879,8 +817,7 @@ an asynchronous I/O operation.
|
||||
UNIV_INLINE
|
||||
dberr_t
|
||||
pfs_os_aio_func(
|
||||
IORequest& type,
|
||||
ulint mode,
|
||||
const IORequest&type,
|
||||
const char* name,
|
||||
pfs_os_file_t file,
|
||||
void* buf,
|
||||
@@ -1013,9 +950,9 @@ to original un-instrumented file I/O APIs */
|
||||
|
||||
# define os_file_close(file) os_file_close_func(file)
|
||||
|
||||
# define os_aio(type, mode, name, file, buf, offset, \
|
||||
# define os_aio(type, name, file, buf, offset, \
|
||||
n, read_only, message1, message2) \
|
||||
os_aio_func(type, mode, name, file, buf, offset, \
|
||||
os_aio_func(type, name, file, buf, offset, \
|
||||
n, read_only, message1, message2)
|
||||
|
||||
# define os_file_read(type, file, buf, offset, n) \
|
||||
@@ -1281,7 +1218,6 @@ struct os_aio_userdata_t
|
||||
NOTE! Use the corresponding macro os_aio(), not directly this function!
|
||||
Requests an asynchronous i/o operation.
|
||||
@param[in,out] type IO request context
|
||||
@param[in] mode IO mode
|
||||
@param[in] name Name of the file or path as NUL terminated
|
||||
string
|
||||
@param[in] file Open file handle
|
||||
@@ -1298,8 +1234,7 @@ Requests an asynchronous i/o operation.
|
||||
@return DB_SUCCESS or error code */
|
||||
dberr_t
|
||||
os_aio_func(
|
||||
IORequest& type,
|
||||
ulint mode,
|
||||
const IORequest&type,
|
||||
const char* name,
|
||||
pfs_os_file_t file,
|
||||
void* buf,
|
||||
|
@@ -206,7 +206,6 @@ function!
|
||||
Performance schema wrapper function of os_aio() which requests
|
||||
an asynchronous i/o operation.
|
||||
@param[in,type] type IO request context
|
||||
@param[in] mode IO mode
|
||||
@param[in] name Name of the file or path as NUL terminated
|
||||
string
|
||||
@param[in] file Open file handle
|
||||
@@ -226,8 +225,7 @@ an asynchronous i/o operation.
|
||||
UNIV_INLINE
|
||||
dberr_t
|
||||
pfs_os_aio_func(
|
||||
IORequest& type,
|
||||
ulint mode,
|
||||
const IORequest&type,
|
||||
const char* name,
|
||||
pfs_os_file_t file,
|
||||
void* buf,
|
||||
@@ -242,8 +240,6 @@ pfs_os_aio_func(
|
||||
PSI_file_locker_state state;
|
||||
struct PSI_file_locker* locker = NULL;
|
||||
|
||||
ut_ad(type.validate());
|
||||
|
||||
/* Register the read or write I/O depending on "type" */
|
||||
register_pfs_file_io_begin(
|
||||
&state, locker, file, n,
|
||||
@@ -251,7 +247,7 @@ pfs_os_aio_func(
|
||||
src_file, src_line);
|
||||
|
||||
dberr_t result = os_aio_func(
|
||||
type, mode, name, file, buf, offset, n, read_only, m1, m2);
|
||||
type, name, file, buf, offset, n, read_only, m1, m2);
|
||||
|
||||
register_pfs_file_io_end(locker, n);
|
||||
|
||||
@@ -284,8 +280,6 @@ pfs_os_file_read_func(
|
||||
PSI_file_locker_state state;
|
||||
struct PSI_file_locker* locker = NULL;
|
||||
|
||||
ut_ad(type.validate());
|
||||
|
||||
register_pfs_file_io_begin(
|
||||
&state, locker, file, n, PSI_FILE_READ, src_file, src_line);
|
||||
|
||||
|
@@ -46,10 +46,9 @@ Created 3/26/1996 Heikki Tuuri
|
||||
/** Checks if a page address is the trx sys header page.
|
||||
@param[in] page_id page id
|
||||
@return true if trx sys header page */
|
||||
inline bool trx_sys_hdr_page(const page_id_t& page_id)
|
||||
inline bool trx_sys_hdr_page(const page_id_t page_id)
|
||||
{
|
||||
return(page_id.space() == TRX_SYS_SPACE
|
||||
&& page_id.page_no() == TRX_SYS_PAGE_NO);
|
||||
return page_id == page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
|
@@ -2060,7 +2060,14 @@ same_page:
|
||||
const bool is_init= (b & 0x70) <= INIT_PAGE;
|
||||
switch (*store) {
|
||||
case STORE_IF_EXISTS:
|
||||
if (!fil_space_get_size(space_id))
|
||||
if (fil_space_t *space= fil_space_acquire_silent(space_id))
|
||||
{
|
||||
const auto size= space->get_size();
|
||||
space->release();
|
||||
if (!size)
|
||||
continue;
|
||||
}
|
||||
else
|
||||
continue;
|
||||
/* fall through */
|
||||
case STORE_YES:
|
||||
@@ -2487,7 +2494,7 @@ static void recv_read_in_area(page_id_t page_id)
|
||||
|
||||
if (p != page_nos) {
|
||||
mutex_exit(&recv_sys.mutex);
|
||||
buf_read_recv_pages(FALSE, page_id.space(), page_nos,
|
||||
buf_read_recv_pages(page_id.space(), page_nos,
|
||||
ulint(p - page_nos));
|
||||
mutex_enter(&recv_sys.mutex);
|
||||
}
|
||||
@@ -2513,7 +2520,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id,
|
||||
if (end_lsn < i.lsn)
|
||||
DBUG_LOG("ib_log", "skip log for page " << page_id
|
||||
<< " LSN " << end_lsn << " < " << i.lsn);
|
||||
else if (fil_space_t *space= fil_space_acquire_for_io(page_id.space()))
|
||||
else if (fil_space_t *space= fil_space_t::get_for_io(page_id.space()))
|
||||
{
|
||||
mtr.start();
|
||||
mtr.set_log_mode(MTR_LOG_NO_REDO);
|
||||
|
@@ -214,7 +214,7 @@ static void memo_slot_release(mtr_memo_slot_t *slot)
|
||||
case MTR_MEMO_SPACE_X_LOCK:
|
||||
{
|
||||
fil_space_t *space= static_cast<fil_space_t*>(slot->object);
|
||||
space->committed_size= space->size;
|
||||
space->set_committed_size();
|
||||
rw_lock_x_unlock(&space->latch);
|
||||
}
|
||||
break;
|
||||
@@ -256,7 +256,7 @@ struct ReleaseLatches {
|
||||
case MTR_MEMO_SPACE_X_LOCK:
|
||||
{
|
||||
fil_space_t *space= static_cast<fil_space_t*>(slot->object);
|
||||
space->committed_size= space->size;
|
||||
space->set_committed_size();
|
||||
rw_lock_x_unlock(&space->latch);
|
||||
}
|
||||
break;
|
||||
|
@@ -135,7 +135,6 @@ public:
|
||||
|
||||
static io_slots *read_slots;
|
||||
static io_slots *write_slots;
|
||||
static io_slots *ibuf_slots;
|
||||
|
||||
/** Number of retries for partial I/O's */
|
||||
constexpr ulint NUM_RETRIES_ON_PARTIAL_IO = 10;
|
||||
@@ -3143,14 +3142,7 @@ os_file_io(
|
||||
|
||||
bytes_returned += n_bytes;
|
||||
|
||||
if (offset > 0
|
||||
&& type.is_write()
|
||||
&& type.punch_hole()) {
|
||||
*err = type.punch_hole(file, offset, n);
|
||||
|
||||
} else {
|
||||
*err = DB_SUCCESS;
|
||||
}
|
||||
*err = type.maybe_punch_hole(offset, n);
|
||||
|
||||
return(original_n);
|
||||
}
|
||||
@@ -3161,8 +3153,7 @@ os_file_io(
|
||||
|
||||
bytes_returned += n_bytes;
|
||||
|
||||
if (!type.is_partial_io_warning_disabled()) {
|
||||
|
||||
if (type.type != IORequest::READ_MAYBE_PARTIAL) {
|
||||
const char* op = type.is_read()
|
||||
? "read" : "written";
|
||||
|
||||
@@ -3180,7 +3171,7 @@ os_file_io(
|
||||
|
||||
*err = DB_IO_ERROR;
|
||||
|
||||
if (!type.is_partial_io_warning_disabled()) {
|
||||
if (type.type != IORequest::READ_MAYBE_PARTIAL) {
|
||||
ib::warn()
|
||||
<< "Retry attempts for "
|
||||
<< (type.is_read() ? "reading" : "writing")
|
||||
@@ -3208,7 +3199,6 @@ os_file_pwrite(
|
||||
os_offset_t offset,
|
||||
dberr_t* err)
|
||||
{
|
||||
ut_ad(type.validate());
|
||||
ut_ad(type.is_write());
|
||||
|
||||
++os_n_file_writes;
|
||||
@@ -3242,7 +3232,6 @@ os_file_write_func(
|
||||
{
|
||||
dberr_t err;
|
||||
|
||||
ut_ad(type.validate());
|
||||
ut_ad(n > 0);
|
||||
|
||||
WAIT_ALLOW_WRITES();
|
||||
@@ -3332,7 +3321,6 @@ os_file_read_page(
|
||||
|
||||
os_bytes_read_since_printout += n;
|
||||
|
||||
ut_ad(type.validate());
|
||||
ut_ad(n > 0);
|
||||
|
||||
ssize_t n_bytes = os_file_pread(type, file, buf, n, offset, &err);
|
||||
@@ -3657,13 +3645,9 @@ fallback:
|
||||
n_bytes = buf_size;
|
||||
}
|
||||
|
||||
dberr_t err;
|
||||
IORequest request(IORequest::WRITE);
|
||||
|
||||
err = os_file_write(
|
||||
request, name, file, buf, current_size, n_bytes);
|
||||
|
||||
if (err != DB_SUCCESS) {
|
||||
if (os_file_write(IORequestWrite, name,
|
||||
file, buf, current_size, n_bytes) !=
|
||||
DB_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -3786,18 +3770,11 @@ os_file_punch_hole(
|
||||
#endif /* _WIN32 */
|
||||
}
|
||||
|
||||
inline bool IORequest::should_punch_hole() const
|
||||
{
|
||||
return m_fil_node && m_fil_node->space->punch_hole;
|
||||
}
|
||||
|
||||
/** Free storage space associated with a section of the file.
|
||||
@param[in] fh Open file handle
|
||||
@param[in] off Starting offset (SEEK_SET)
|
||||
@param[in] len Size of the hole
|
||||
@param off byte offset from the start (SEEK_SET)
|
||||
@param len size of the hole in bytes
|
||||
@return DB_SUCCESS or error code */
|
||||
dberr_t
|
||||
IORequest::punch_hole(os_file_t fh, os_offset_t off, ulint len)
|
||||
dberr_t IORequest::punch_hole(os_offset_t off, ulint len) const
|
||||
{
|
||||
/* In this debugging mode, we act as if punch hole is supported,
|
||||
and then skip any calls to actually punch a hole here.
|
||||
@@ -3806,7 +3783,7 @@ IORequest::punch_hole(os_file_t fh, os_offset_t off, ulint len)
|
||||
return(DB_SUCCESS);
|
||||
);
|
||||
|
||||
ulint trim_len = get_trim_length(len);
|
||||
ulint trim_len = bpage ? bpage->physical_size() - len : 0;
|
||||
|
||||
if (trim_len == 0) {
|
||||
return(DB_SUCCESS);
|
||||
@@ -3816,11 +3793,11 @@ IORequest::punch_hole(os_file_t fh, os_offset_t off, ulint len)
|
||||
|
||||
/* Check does file system support punching holes for this
|
||||
tablespace. */
|
||||
if (!should_punch_hole()) {
|
||||
if (!node->space->punch_hole) {
|
||||
return DB_IO_NO_PUNCH_HOLE;
|
||||
}
|
||||
|
||||
dberr_t err = os_file_punch_hole(fh, off, trim_len);
|
||||
dberr_t err = os_file_punch_hole(node->handle, off, trim_len);
|
||||
|
||||
if (err == DB_SUCCESS) {
|
||||
srv_stats.page_compressed_trim_op.inc();
|
||||
@@ -3828,7 +3805,7 @@ IORequest::punch_hole(os_file_t fh, os_offset_t off, ulint len)
|
||||
/* If punch hole is not supported,
|
||||
set space so that it is not used. */
|
||||
if (err == DB_IO_NO_PUNCH_HOLE) {
|
||||
m_fil_node->space->punch_hole = false;
|
||||
node->space->punch_hole = false;
|
||||
err = DB_SUCCESS;
|
||||
}
|
||||
}
|
||||
@@ -3885,12 +3862,8 @@ static void io_callback(tpool::aiocb* cb)
|
||||
os_aio_userdata_t data(cb->m_userdata);
|
||||
/* Return cb back to cache*/
|
||||
if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD) {
|
||||
if (read_slots->contains(cb)) {
|
||||
ut_ad(read_slots->contains(cb));
|
||||
read_slots->release(cb);
|
||||
} else {
|
||||
ut_ad(ibuf_slots->contains(cb));
|
||||
ibuf_slots->release(cb);
|
||||
}
|
||||
} else {
|
||||
ut_ad(write_slots->contains(cb));
|
||||
write_slots->release(cb);
|
||||
@@ -4033,8 +4006,7 @@ bool os_aio_init(ulint n_reader_threads, ulint n_writer_threads, ulint)
|
||||
{
|
||||
int max_write_events= int(n_writer_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
|
||||
int max_read_events= int(n_reader_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
|
||||
int max_ibuf_events = 1 * OS_AIO_N_PENDING_IOS_PER_THREAD;
|
||||
int max_events = max_read_events + max_write_events + max_ibuf_events;
|
||||
int max_events = max_read_events + max_write_events;
|
||||
int ret;
|
||||
|
||||
#if LINUX_NATIVE_AIO
|
||||
@@ -4053,7 +4025,6 @@ bool os_aio_init(ulint n_reader_threads, ulint n_writer_threads, ulint)
|
||||
}
|
||||
read_slots = new io_slots(max_read_events, (uint)n_reader_threads);
|
||||
write_slots = new io_slots(max_write_events, (uint)n_writer_threads);
|
||||
ibuf_slots = new io_slots(max_ibuf_events, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -4062,10 +4033,8 @@ void os_aio_free()
|
||||
srv_thread_pool->disable_aio();
|
||||
delete read_slots;
|
||||
delete write_slots;
|
||||
delete ibuf_slots;
|
||||
read_slots= nullptr;
|
||||
write_slots= nullptr;
|
||||
ibuf_slots= nullptr;
|
||||
}
|
||||
|
||||
/** Waits until there are no pending writes. There can
|
||||
@@ -4088,7 +4057,6 @@ void os_aio_wait_until_no_pending_writes()
|
||||
NOTE! Use the corresponding macro os_aio(), not directly this function!
|
||||
Requests an asynchronous i/o operation.
|
||||
@param[in,out] type IO request context
|
||||
@param[in] mode IO mode
|
||||
@param[in] name Name of the file or path as NUL terminated
|
||||
string
|
||||
@param[in] file Open file handle
|
||||
@@ -4106,8 +4074,7 @@ Requests an asynchronous i/o operation.
|
||||
@return DB_SUCCESS or error code */
|
||||
dberr_t
|
||||
os_aio_func(
|
||||
IORequest& type,
|
||||
ulint mode,
|
||||
const IORequest&type,
|
||||
const char* name,
|
||||
pfs_os_file_t file,
|
||||
void* buf,
|
||||
@@ -4126,10 +4093,7 @@ os_aio_func(
|
||||
ut_ad((n & 0xFFFFFFFFUL) == n);
|
||||
#endif /* WIN_ASYNC_IO */
|
||||
|
||||
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
|
||||
mode = OS_AIO_SYNC; os_has_said_disk_full = FALSE;);
|
||||
|
||||
if (mode == OS_AIO_SYNC) {
|
||||
if (!type.is_async()) {
|
||||
if (type.is_read()) {
|
||||
return(os_file_read_func(type, file, buf, offset, n));
|
||||
}
|
||||
@@ -4141,20 +4105,14 @@ os_aio_func(
|
||||
|
||||
if (type.is_read()) {
|
||||
++os_n_file_reads;
|
||||
} else if (type.is_write()) {
|
||||
++os_n_file_writes;
|
||||
} else {
|
||||
ut_error;
|
||||
ut_ad(type.is_write());
|
||||
++os_n_file_writes;
|
||||
}
|
||||
|
||||
compile_time_assert(sizeof(os_aio_userdata_t) <= tpool::MAX_AIO_USERDATA_LEN);
|
||||
os_aio_userdata_t userdata{m1,type,m2};
|
||||
io_slots* slots;
|
||||
if (type.is_read()) {
|
||||
slots = mode == OS_AIO_IBUF?ibuf_slots: read_slots;
|
||||
} else {
|
||||
slots = write_slots;
|
||||
}
|
||||
io_slots* slots= type.is_read() ? read_slots : write_slots;
|
||||
tpool::aiocb* cb = slots->acquire();
|
||||
|
||||
cb->m_buffer = buf;
|
||||
@@ -4462,12 +4420,11 @@ void fil_node_t::find_metadata(os_file_t file
|
||||
}
|
||||
|
||||
/** Read the first page of a data file.
|
||||
@param[in] first whether this is the very first read
|
||||
@return whether the page was found valid */
|
||||
bool fil_node_t::read_page0(bool first)
|
||||
bool fil_node_t::read_page0()
|
||||
{
|
||||
ut_ad(mutex_own(&fil_system.mutex));
|
||||
const ulint psize = space->physical_size();
|
||||
const unsigned psize = space->physical_size();
|
||||
#ifndef _WIN32
|
||||
struct stat statbuf;
|
||||
if (fstat(handle, &statbuf)) {
|
||||
@@ -4479,7 +4436,7 @@ bool fil_node_t::read_page0(bool first)
|
||||
os_offset_t size_bytes = os_file_get_size(handle);
|
||||
ut_a(size_bytes != (os_offset_t) -1);
|
||||
#endif
|
||||
const ulint min_size = FIL_IBD_FILE_INITIAL_SIZE * psize;
|
||||
const uint32_t min_size = FIL_IBD_FILE_INITIAL_SIZE * psize;
|
||||
|
||||
if (size_bytes < min_size) {
|
||||
ib::error() << "The size of the file " << name
|
||||
@@ -4546,14 +4503,11 @@ invalid:
|
||||
return false;
|
||||
}
|
||||
|
||||
if (first) {
|
||||
ut_ad(space->id != TRX_SYS_SPACE);
|
||||
#ifdef UNIV_LINUX
|
||||
find_metadata(handle, &statbuf);
|
||||
#else
|
||||
find_metadata();
|
||||
#endif
|
||||
|
||||
/* Truncate the size to a multiple of extent size. */
|
||||
ulint mask = psize * FSP_EXTENT_SIZE - 1;
|
||||
|
||||
@@ -4568,19 +4522,7 @@ invalid:
|
||||
|
||||
space->punch_hole = space->is_compressed();
|
||||
this->size = uint32_t(size_bytes / psize);
|
||||
space->committed_size = space->size += this->size;
|
||||
} else if (space->id != TRX_SYS_SPACE || space->size_in_header) {
|
||||
/* If this is not the first-time open, do nothing.
|
||||
For the system tablespace, we always get invoked as
|
||||
first=false, so we detect the true first-time-open based
|
||||
on size_in_header and proceed to initialize the data. */
|
||||
return true;
|
||||
} else {
|
||||
/* Initialize the size of predefined tablespaces
|
||||
to FSP_SIZE. */
|
||||
space->committed_size = size;
|
||||
}
|
||||
|
||||
space->set_sizes(this->size);
|
||||
ut_ad(space->free_limit == 0 || space->free_limit == free_limit);
|
||||
ut_ad(space->free_len == 0 || space->free_len == free_len);
|
||||
space->size_in_header = size;
|
||||
|
@@ -3424,8 +3424,7 @@ fil_iterate(
|
||||
byte* const writeptr = readptr;
|
||||
|
||||
err = os_file_read_no_error_handling(
|
||||
IORequest(IORequest::READ
|
||||
| IORequest::DISABLE_PARTIAL_IO_WARNINGS),
|
||||
IORequestReadPartial,
|
||||
iter.file, readptr, offset, n_bytes, 0);
|
||||
if (err != DB_SUCCESS) {
|
||||
ib::error() << iter.filepath
|
||||
@@ -3664,9 +3663,7 @@ not_encrypted:
|
||||
|
||||
/* A page was updated in the set, write back to disk. */
|
||||
if (updated) {
|
||||
IORequest write_request(IORequest::WRITE);
|
||||
|
||||
err = os_file_write(write_request,
|
||||
err = os_file_write(IORequestWrite,
|
||||
iter.filepath, iter.file,
|
||||
writeptr, offset, n_bytes);
|
||||
|
||||
@@ -3759,9 +3756,7 @@ fil_tablespace_iterate(
|
||||
|
||||
/* Read the first page and determine the page and zip size. */
|
||||
|
||||
err = os_file_read_no_error_handling(
|
||||
IORequest(IORequest::READ
|
||||
| IORequest::DISABLE_PARTIAL_IO_WARNINGS),
|
||||
err = os_file_read_no_error_handling(IORequestReadPartial,
|
||||
file, page, 0, srv_page_size, 0);
|
||||
|
||||
if (err == DB_SUCCESS) {
|
||||
|
@@ -545,7 +545,7 @@ row_quiesce_table_start(
|
||||
if (!trx_is_interrupted(trx)) {
|
||||
/* Ensure that all asynchronous IO is completed. */
|
||||
os_aio_wait_until_no_pending_writes();
|
||||
fil_flush(table->space_id);
|
||||
table->space->flush();
|
||||
|
||||
if (row_quiesce_write_cfg(table, trx->mysql_thd)
|
||||
!= DB_SUCCESS) {
|
||||
|
@@ -229,10 +229,12 @@ srv_file_check_mode(
|
||||
static const char INIT_LOG_FILE0[]= "101";
|
||||
|
||||
/** Creates log file.
|
||||
@param[in] create_new_db whether the database is being initialized
|
||||
@param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value
|
||||
@param[out] logfile0 name of the log file
|
||||
@return DB_SUCCESS or error code */
|
||||
static dberr_t create_log_file(lsn_t lsn, std::string& logfile0)
|
||||
static dberr_t create_log_file(bool create_new_db, lsn_t lsn,
|
||||
std::string& logfile0)
|
||||
{
|
||||
if (srv_read_only_mode) {
|
||||
ib::error() << "Cannot create log file in read-only mode";
|
||||
@@ -296,7 +298,9 @@ static dberr_t create_log_file(lsn_t lsn, std::string& logfile0)
|
||||
}
|
||||
|
||||
log_sys.log.open_file(logfile0);
|
||||
fil_open_system_tablespace_files();
|
||||
if (!fil_system.sys_space->open(create_new_db)) {
|
||||
return DB_ERROR;
|
||||
}
|
||||
|
||||
/* Create a log checkpoint. */
|
||||
log_mutex_enter();
|
||||
@@ -553,7 +557,7 @@ err_exit:
|
||||
|
||||
fil_set_max_space_id_if_bigger(space_id);
|
||||
|
||||
fil_space_t *space= fil_space_create(undo_name, space_id, fsp_flags,
|
||||
fil_space_t *space= fil_space_t::create(undo_name, space_id, fsp_flags,
|
||||
FIL_TYPE_TABLESPACE, NULL);
|
||||
ut_a(fil_validate());
|
||||
ut_a(space);
|
||||
@@ -563,21 +567,16 @@ err_exit:
|
||||
|
||||
if (create)
|
||||
{
|
||||
space->set_sizes(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
|
||||
space->size= file->size= uint32_t(size >> srv_page_size_shift);
|
||||
space->size_in_header= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
|
||||
space->committed_size= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
|
||||
}
|
||||
else
|
||||
{
|
||||
success= file->read_page0(true);
|
||||
if (!success)
|
||||
else if (!file->read_page0())
|
||||
{
|
||||
os_file_close(file->handle);
|
||||
file->handle= OS_FILE_CLOSED;
|
||||
ut_a(fil_system.n_open > 0);
|
||||
fil_system.n_open--;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&fil_system.mutex);
|
||||
return space_id;
|
||||
@@ -803,7 +802,7 @@ srv_open_tmp_tablespace(bool create_new_db)
|
||||
true, create_new_db, &sum_of_new_sizes, NULL))
|
||||
!= DB_SUCCESS) {
|
||||
ib::error() << "Unable to create the shared innodb_temporary";
|
||||
} else if (fil_system.temp_space->open()) {
|
||||
} else if (fil_system.temp_space->open(true)) {
|
||||
/* Initialize the header page */
|
||||
mtr_t mtr;
|
||||
mtr.start();
|
||||
@@ -1304,7 +1303,7 @@ dberr_t srv_start(bool create_new_db)
|
||||
log_sys.set_flushed_lsn(flushed_lsn);
|
||||
buf_flush_sync();
|
||||
|
||||
err = create_log_file(flushed_lsn, logfile0);
|
||||
err = create_log_file(true, flushed_lsn, logfile0);
|
||||
|
||||
if (err != DB_SUCCESS) {
|
||||
return(srv_init_abort(err));
|
||||
@@ -1333,7 +1332,7 @@ dberr_t srv_start(bool create_new_db)
|
||||
|
||||
srv_log_file_size = srv_log_file_size_requested;
|
||||
|
||||
err = create_log_file(flushed_lsn, logfile0);
|
||||
err = create_log_file(false, flushed_lsn, logfile0);
|
||||
|
||||
if (err == DB_SUCCESS) {
|
||||
err = create_log_file_rename(flushed_lsn,
|
||||
@@ -1364,11 +1363,11 @@ dberr_t srv_start(bool create_new_db)
|
||||
file_checked:
|
||||
/* Open log file and data files in the systemtablespace: we keep
|
||||
them open until database shutdown */
|
||||
|
||||
fil_open_system_tablespace_files();
|
||||
ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug);
|
||||
|
||||
err = srv_undo_tablespaces_init(create_new_db);
|
||||
err = fil_system.sys_space->open(create_new_db)
|
||||
? srv_undo_tablespaces_init(create_new_db)
|
||||
: DB_ERROR;
|
||||
|
||||
/* If the force recovery is set very high then we carry on regardless
|
||||
of all errors. Basically this is fingers crossed mode. */
|
||||
@@ -1673,7 +1672,7 @@ file_checked:
|
||||
|
||||
srv_log_file_size = srv_log_file_size_requested;
|
||||
|
||||
err = create_log_file(flushed_lsn, logfile0);
|
||||
err = create_log_file(false, flushed_lsn, logfile0);
|
||||
|
||||
if (err == DB_SUCCESS) {
|
||||
err = create_log_file_rename(flushed_lsn,
|
||||
|
@@ -584,11 +584,10 @@ static void trx_purge_truncate_history()
|
||||
: 0, j = i;; ) {
|
||||
ulint space_id = srv_undo_space_id_start + i;
|
||||
ut_ad(srv_is_undo_tablespace(space_id));
|
||||
fil_space_t* space= fil_space_get(space_id);
|
||||
|
||||
if (fil_space_get_size(space_id)
|
||||
> threshold) {
|
||||
purge_sys.truncate.current
|
||||
= fil_space_get(space_id);
|
||||
if (space && space->get_size() > threshold) {
|
||||
purge_sys.truncate.current = space;
|
||||
break;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user