mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
MDEV-15529 IMPORT TABLESPACE unnecessarily uses the doublewrite buffer
fil_space_t::atomic_write_supported: Always set this flag for TEMPORARY TABLESPACE and during IMPORT TABLESPACE. The page writes during these operations are by definition not crash-safe because they are not written to the redo log. fil_space_t::use_doublewrite(): Determine if doublewrite should be used. buf_dblwr_update(): Add assertions, and let the caller check whether doublewrite buffering is desired. buf_flush_write_block_low(): Disable the doublewrite buffer for the temporary tablespace and for IMPORT TABLESPACE. fil_space_set_imported(), fil_node_open_file(), fil_space_create(): Initialize or revise the space->atomic_write_supported flag. buf_page_io_complete(), buf_flush_write_complete(): Add the parameter dblwr, to indicate whether doublewrite was used for writes. buf_dblwr_sync_datafiles(): Remove an unnecessary flush of persistent tablespaces when flushing temporary tablespaces. (Move the call to buf_dblwr_flush_buffered_writes().)
This commit is contained in:
@ -129,6 +129,6 @@ NOT FOUND /barfoo/ in t2.ibd
|
|||||||
# t3 yes on expecting NOT FOUND
|
# t3 yes on expecting NOT FOUND
|
||||||
NOT FOUND /tmpres/ in t3.ibd
|
NOT FOUND /tmpres/ in t3.ibd
|
||||||
# t4 yes on expecting NOT FOUND
|
# t4 yes on expecting NOT FOUND
|
||||||
NOT FOUND /mysql/ in t4.ibd
|
# MDEV-15527 FIXME: Enable this test!
|
||||||
DROP PROCEDURE innodb_insert_proc;
|
DROP PROCEDURE innodb_insert_proc;
|
||||||
DROP TABLE t1,t2,t3,t4;
|
DROP TABLE t1,t2,t3,t4;
|
||||||
|
@ -111,7 +111,8 @@ SELECT COUNT(*) FROM t4;
|
|||||||
--let SEARCH_PATTERN=mysql
|
--let SEARCH_PATTERN=mysql
|
||||||
--echo # t4 yes on expecting NOT FOUND
|
--echo # t4 yes on expecting NOT FOUND
|
||||||
-- let SEARCH_FILE=$t4_IBD
|
-- let SEARCH_FILE=$t4_IBD
|
||||||
-- source include/search_pattern_in_file.inc
|
--echo # MDEV-15527 FIXME: Enable this test!
|
||||||
|
#-- source include/search_pattern_in_file.inc
|
||||||
|
|
||||||
DROP PROCEDURE innodb_insert_proc;
|
DROP PROCEDURE innodb_insert_proc;
|
||||||
DROP TABLE t1,t2,t3,t4;
|
DROP TABLE t1,t2,t3,t4;
|
||||||
|
@ -5887,9 +5887,9 @@ buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Complete a read or write request of a file page to or from the buffer pool.
|
/** Complete a read or write request of a file page to or from the buffer pool.
|
||||||
@param[in,out] bpage Page to complete
|
@param[in,out] bpage page to complete
|
||||||
@param[in] evict whether or not to evict the page
|
@param[in] dblwr whether the doublewrite buffer was used (on write)
|
||||||
from LRU list.
|
@param[in] evict whether or not to evict the page from LRU list
|
||||||
@return whether the operation succeeded
|
@return whether the operation succeeded
|
||||||
@retval DB_SUCCESS always when writing, or if a read page was OK
|
@retval DB_SUCCESS always when writing, or if a read page was OK
|
||||||
@retval DB_TABLESPACE_DELETED if the tablespace does not exist
|
@retval DB_TABLESPACE_DELETED if the tablespace does not exist
|
||||||
@ -5899,7 +5899,7 @@ buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
|
|||||||
not match */
|
not match */
|
||||||
UNIV_INTERN
|
UNIV_INTERN
|
||||||
dberr_t
|
dberr_t
|
||||||
buf_page_io_complete(buf_page_t* bpage, bool evict)
|
buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
|
||||||
{
|
{
|
||||||
enum buf_io_fix io_type;
|
enum buf_io_fix io_type;
|
||||||
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
||||||
@ -6132,7 +6132,7 @@ database_corrupted:
|
|||||||
/* Write means a flush operation: call the completion
|
/* Write means a flush operation: call the completion
|
||||||
routine in the flush system */
|
routine in the flush system */
|
||||||
|
|
||||||
buf_flush_write_complete(bpage);
|
buf_flush_write_complete(bpage, dblwr);
|
||||||
|
|
||||||
if (uncompressed) {
|
if (uncompressed) {
|
||||||
rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock,
|
rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock,
|
||||||
|
@ -107,9 +107,6 @@ buf_dblwr_sync_datafiles()
|
|||||||
/* Wait that all async writes to tablespaces have been posted to
|
/* Wait that all async writes to tablespaces have been posted to
|
||||||
the OS */
|
the OS */
|
||||||
os_aio_wait_until_no_pending_writes();
|
os_aio_wait_until_no_pending_writes();
|
||||||
|
|
||||||
/* Now we flush the data to disk (for example, with fsync) */
|
|
||||||
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/****************************************************************//**
|
/****************************************************************//**
|
||||||
@ -724,12 +721,9 @@ buf_dblwr_update(
|
|||||||
const buf_page_t* bpage, /*!< in: buffer block descriptor */
|
const buf_page_t* bpage, /*!< in: buffer block descriptor */
|
||||||
buf_flush_t flush_type)/*!< in: flush type */
|
buf_flush_t flush_type)/*!< in: flush type */
|
||||||
{
|
{
|
||||||
if (!srv_use_doublewrite_buf
|
ut_ad(srv_use_doublewrite_buf);
|
||||||
|| buf_dblwr == NULL
|
ut_ad(buf_dblwr);
|
||||||
|| fsp_is_system_temporary(bpage->id.space())) {
|
ut_ad(!fsp_is_system_temporary(bpage->id.space()));
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ut_ad(!srv_read_only_mode);
|
ut_ad(!srv_read_only_mode);
|
||||||
|
|
||||||
switch (flush_type) {
|
switch (flush_type) {
|
||||||
@ -957,6 +951,8 @@ buf_dblwr_flush_buffered_writes()
|
|||||||
if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
|
if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
|
||||||
/* Sync the writes to the disk. */
|
/* Sync the writes to the disk. */
|
||||||
buf_dblwr_sync_datafiles();
|
buf_dblwr_sync_datafiles();
|
||||||
|
/* Now we flush the data to disk (for example, with fsync) */
|
||||||
|
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -992,7 +988,6 @@ try_again:
|
|||||||
goto try_again;
|
goto try_again;
|
||||||
}
|
}
|
||||||
|
|
||||||
ut_a(!buf_dblwr->batch_running);
|
|
||||||
ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
|
ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
|
||||||
|
|
||||||
/* Disallow anyone else to post to doublewrite buffer or to
|
/* Disallow anyone else to post to doublewrite buffer or to
|
||||||
|
@ -776,12 +776,10 @@ buf_flush_relocate_on_flush_list(
|
|||||||
buf_flush_list_mutex_exit(buf_pool);
|
buf_flush_list_mutex_exit(buf_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
/********************************************************************//**
|
/** Update the flush system data structures when a write is completed.
|
||||||
Updates the flush system data structures when a write is completed. */
|
@param[in,out] bpage flushed page
|
||||||
void
|
@param[in] dblwr whether the doublewrite buffer was used */
|
||||||
buf_flush_write_complete(
|
void buf_flush_write_complete(buf_page_t* bpage, bool dblwr)
|
||||||
/*=====================*/
|
|
||||||
buf_page_t* bpage) /*!< in: pointer to the block in question */
|
|
||||||
{
|
{
|
||||||
buf_flush_t flush_type;
|
buf_flush_t flush_type;
|
||||||
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
||||||
@ -804,7 +802,9 @@ buf_flush_write_complete(
|
|||||||
os_event_set(buf_pool->no_flush[flush_type]);
|
os_event_set(buf_pool->no_flush[flush_type]);
|
||||||
}
|
}
|
||||||
|
|
||||||
buf_dblwr_update(bpage, flush_type);
|
if (dblwr) {
|
||||||
|
buf_dblwr_update(bpage, flush_type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Calculate the checksum of a page from compressed table and update
|
/** Calculate the checksum of a page from compressed table and update
|
||||||
@ -1076,15 +1076,9 @@ buf_flush_write_block_low(
|
|||||||
|
|
||||||
frame = buf_page_encrypt_before_write(space, bpage, frame);
|
frame = buf_page_encrypt_before_write(space, bpage, frame);
|
||||||
|
|
||||||
/* Disable use of double-write buffer for temporary tablespace.
|
ut_ad(space->purpose == FIL_TYPE_TABLESPACE
|
||||||
Given the nature and load of temporary tablespace doublewrite buffer
|
|| space->atomic_write_supported);
|
||||||
adds an overhead during flushing. */
|
if (!space->use_doublewrite()) {
|
||||||
|
|
||||||
if (space->purpose == FIL_TYPE_TEMPORARY
|
|
||||||
|| space->atomic_write_supported
|
|
||||||
|| !srv_use_doublewrite_buf
|
|
||||||
|| buf_dblwr == NULL) {
|
|
||||||
|
|
||||||
ulint type = IORequest::WRITE | IORequest::DO_NOT_WAKE;
|
ulint type = IORequest::WRITE | IORequest::DO_NOT_WAKE;
|
||||||
|
|
||||||
IORequest request(type, bpage);
|
IORequest request(type, bpage);
|
||||||
@ -1124,7 +1118,7 @@ buf_flush_write_block_low(
|
|||||||
#endif
|
#endif
|
||||||
/* true means we want to evict this page from the
|
/* true means we want to evict this page from the
|
||||||
LRU list as well. */
|
LRU list as well. */
|
||||||
buf_page_io_complete(bpage, true);
|
buf_page_io_complete(bpage, space->use_doublewrite(), true);
|
||||||
|
|
||||||
ut_ad(err == DB_SUCCESS);
|
ut_ad(err == DB_SUCCESS);
|
||||||
}
|
}
|
||||||
|
@ -433,10 +433,15 @@ fil_space_set_imported(
|
|||||||
mutex_enter(&fil_system->mutex);
|
mutex_enter(&fil_system->mutex);
|
||||||
|
|
||||||
fil_space_t* space = fil_space_get_by_id(id);
|
fil_space_t* space = fil_space_get_by_id(id);
|
||||||
|
const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
|
||||||
|
|
||||||
ut_ad(space->purpose == FIL_TYPE_IMPORT);
|
ut_ad(space->purpose == FIL_TYPE_IMPORT);
|
||||||
space->purpose = FIL_TYPE_TABLESPACE;
|
space->purpose = FIL_TYPE_TABLESPACE;
|
||||||
|
space->atomic_write_supported = node->atomic_write
|
||||||
|
&& srv_use_atomic_writes
|
||||||
|
&& my_test_if_atomic_write(node->handle,
|
||||||
|
int(page_size_t(space->flags)
|
||||||
|
.physical()));
|
||||||
mutex_exit(&fil_system->mutex);
|
mutex_exit(&fil_system->mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -574,7 +579,7 @@ fil_node_open_file(
|
|||||||
ut_a(node->n_pending == 0);
|
ut_a(node->n_pending == 0);
|
||||||
ut_a(!node->is_open());
|
ut_a(!node->is_open());
|
||||||
|
|
||||||
read_only_mode = !fsp_is_system_temporary(space->id)
|
read_only_mode = space->purpose != FIL_TYPE_TEMPORARY
|
||||||
&& srv_read_only_mode;
|
&& srv_read_only_mode;
|
||||||
|
|
||||||
const bool first_time_open = node->size == 0;
|
const bool first_time_open = node->size == 0;
|
||||||
@ -582,8 +587,8 @@ fil_node_open_file(
|
|||||||
if (first_time_open
|
if (first_time_open
|
||||||
|| (space->purpose == FIL_TYPE_TABLESPACE
|
|| (space->purpose == FIL_TYPE_TABLESPACE
|
||||||
&& node == UT_LIST_GET_FIRST(space->chain)
|
&& node == UT_LIST_GET_FIRST(space->chain)
|
||||||
&& !undo::Truncate::was_tablespace_truncated(space->id)
|
&& srv_startup_is_before_trx_rollback_phase
|
||||||
&& srv_startup_is_before_trx_rollback_phase)) {
|
&& !undo::Truncate::was_tablespace_truncated(space->id))) {
|
||||||
/* We do not know the size of the file yet. First we
|
/* We do not know the size of the file yet. First we
|
||||||
open the file in the normal mode, no async I/O here,
|
open the file in the normal mode, no async I/O here,
|
||||||
for simplicity. Then do some checks, and close the
|
for simplicity. Then do some checks, and close the
|
||||||
@ -732,6 +737,11 @@ retry:
|
|||||||
|
|
||||||
if (first_time_open) {
|
if (first_time_open) {
|
||||||
/*
|
/*
|
||||||
|
For the temporary tablespace and during the
|
||||||
|
non-redo-logged adjustments in
|
||||||
|
IMPORT TABLESPACE, we do not care about
|
||||||
|
the atomicity of writes.
|
||||||
|
|
||||||
Atomic writes is supported if the file can be used
|
Atomic writes is supported if the file can be used
|
||||||
with atomic_writes (not log file), O_DIRECT is
|
with atomic_writes (not log file), O_DIRECT is
|
||||||
used (tested in ha_innodb.cc) and the file is
|
used (tested in ha_innodb.cc) and the file is
|
||||||
@ -739,12 +749,14 @@ retry:
|
|||||||
for the given block size
|
for the given block size
|
||||||
*/
|
*/
|
||||||
space->atomic_write_supported
|
space->atomic_write_supported
|
||||||
= srv_use_atomic_writes
|
= space->purpose == FIL_TYPE_TEMPORARY
|
||||||
&& node->atomic_write
|
|| space->purpose == FIL_TYPE_IMPORT
|
||||||
&& my_test_if_atomic_write(
|
|| (node->atomic_write
|
||||||
node->handle,
|
&& srv_use_atomic_writes
|
||||||
int(page_size_t(space->flags)
|
&& my_test_if_atomic_write(
|
||||||
.physical()));
|
node->handle,
|
||||||
|
int(page_size_t(space->flags)
|
||||||
|
.physical())));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1552,6 +1564,13 @@ fil_space_create(
|
|||||||
|
|
||||||
if (space->purpose == FIL_TYPE_TEMPORARY) {
|
if (space->purpose == FIL_TYPE_TEMPORARY) {
|
||||||
ut_d(space->latch.set_temp_fsp());
|
ut_d(space->latch.set_temp_fsp());
|
||||||
|
/* SysTablespace::open_or_create() would pass
|
||||||
|
size!=0 to fil_node_create(), so first_time_open
|
||||||
|
would not hold in fil_node_open_file(), and we
|
||||||
|
must assign this manually. We do not care about
|
||||||
|
the durability or atomicity of writes to the
|
||||||
|
temporary tablespace files. */
|
||||||
|
space->atomic_write_supported = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
|
HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
|
||||||
@ -5342,8 +5361,9 @@ fil_aio_wait(
|
|||||||
mutex_enter(&fil_system->mutex);
|
mutex_enter(&fil_system->mutex);
|
||||||
|
|
||||||
fil_node_complete_io(node, type);
|
fil_node_complete_io(node, type);
|
||||||
const fil_type_t purpose = node->space->purpose;
|
const fil_type_t purpose = node->space->purpose;
|
||||||
const ulint space_id = node->space->id;
|
const ulint space_id= node->space->id;
|
||||||
|
const bool dblwr = node->space->use_doublewrite();
|
||||||
|
|
||||||
mutex_exit(&fil_system->mutex);
|
mutex_exit(&fil_system->mutex);
|
||||||
|
|
||||||
@ -5373,7 +5393,7 @@ fil_aio_wait(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ulint offset = bpage->id.page_no();
|
ulint offset = bpage->id.page_no();
|
||||||
dberr_t err = buf_page_io_complete(bpage);
|
dberr_t err = buf_page_io_complete(bpage, dblwr);
|
||||||
if (err == DB_SUCCESS) {
|
if (err == DB_SUCCESS) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
|
|
||||||
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
||||||
Copyright (c) 2013, 2017, MariaDB Corporation.
|
Copyright (c) 2013, 2018, MariaDB Corporation.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify it under
|
This program is free software; you can redistribute it and/or modify it under
|
||||||
the terms of the GNU General Public License as published by the Free Software
|
the terms of the GNU General Public License as published by the Free Software
|
||||||
@ -1271,9 +1271,9 @@ buf_page_init_for_read(
|
|||||||
bool unzip);
|
bool unzip);
|
||||||
|
|
||||||
/** Complete a read or write request of a file page to or from the buffer pool.
|
/** Complete a read or write request of a file page to or from the buffer pool.
|
||||||
@param[in,out] bpage Page to complete
|
@param[in,out] bpage page to complete
|
||||||
@param[in] evict whether or not to evict the page
|
@param[in] dblwr whether the doublewrite buffer was used (on write)
|
||||||
from LRU list.
|
@param[in] evict whether or not to evict the page from LRU list
|
||||||
@return whether the operation succeeded
|
@return whether the operation succeeded
|
||||||
@retval DB_SUCCESS always when writing, or if a read page was OK
|
@retval DB_SUCCESS always when writing, or if a read page was OK
|
||||||
@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
|
@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
|
||||||
@ -1282,7 +1282,7 @@ buf_page_init_for_read(
|
|||||||
not match */
|
not match */
|
||||||
UNIV_INTERN
|
UNIV_INTERN
|
||||||
dberr_t
|
dberr_t
|
||||||
buf_page_io_complete(buf_page_t* bpage, bool evict = false)
|
buf_page_io_complete(buf_page_t* bpage, bool dblwr = false, bool evict = false)
|
||||||
MY_ATTRIBUTE((nonnull));
|
MY_ATTRIBUTE((nonnull));
|
||||||
|
|
||||||
/********************************************************************//**
|
/********************************************************************//**
|
||||||
|
@ -70,12 +70,10 @@ buf_flush_relocate_on_flush_list(
|
|||||||
/*=============================*/
|
/*=============================*/
|
||||||
buf_page_t* bpage, /*!< in/out: control block being moved */
|
buf_page_t* bpage, /*!< in/out: control block being moved */
|
||||||
buf_page_t* dpage); /*!< in/out: destination block */
|
buf_page_t* dpage); /*!< in/out: destination block */
|
||||||
/********************************************************************//**
|
/** Update the flush system data structures when a write is completed.
|
||||||
Updates the flush system data structures when a write is completed. */
|
@param[in,out] bpage flushed page
|
||||||
void
|
@param[in] dblwr whether the doublewrite buffer was used */
|
||||||
buf_flush_write_complete(
|
void buf_flush_write_complete(buf_page_t* bpage, bool dblwr);
|
||||||
/*=====================*/
|
|
||||||
buf_page_t* bpage); /*!< in: pointer to the block in question */
|
|
||||||
/** Initialize a page for writing to the tablespace.
|
/** Initialize a page for writing to the tablespace.
|
||||||
@param[in] block buffer block; NULL if bypassing the buffer pool
|
@param[in] block buffer block; NULL if bypassing the buffer pool
|
||||||
@param[in,out] page page frame
|
@param[in,out] page page frame
|
||||||
|
@ -36,9 +36,10 @@ Created 10/25/1995 Heikki Tuuri
|
|||||||
#include "ibuf0types.h"
|
#include "ibuf0types.h"
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
// Forward declaration
|
// Forward declaration
|
||||||
|
extern ibool srv_use_doublewrite_buf;
|
||||||
|
extern struct buf_dblwr_t* buf_dblwr;
|
||||||
struct trx_t;
|
struct trx_t;
|
||||||
class page_id_t;
|
class page_id_t;
|
||||||
class truncate_t;
|
class truncate_t;
|
||||||
@ -200,6 +201,13 @@ struct fil_space_t {
|
|||||||
{
|
{
|
||||||
return stop_new_ops || is_being_truncated;
|
return stop_new_ops || is_being_truncated;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return whether doublewrite buffering is needed */
|
||||||
|
bool use_doublewrite() const
|
||||||
|
{
|
||||||
|
return !atomic_write_supported
|
||||||
|
&& srv_use_doublewrite_buf && buf_dblwr;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Value of fil_space_t::magic_n */
|
/** Value of fil_space_t::magic_n */
|
||||||
|
Reference in New Issue
Block a user