mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-26029: Sparse files are inefficient on thinly provisioned storage
The MariaDB implementation of page_compressed tables for InnoDB used sparse files. In the worst case, in the data file, every data page will consist of some data followed by a hole. This may be extremely inefficient in some file systems. If the underlying storage device is thinly provisioned (can compress data on the fly), it would be good to write regular files (with sequences of NUL bytes at the end of each page_compressed block) and let the storage device take care of compressing the data. For reads, sparse file regions and regions containing NUL bytes will be indistinguishable. my_test_if_disable_punch_hole(): A new predicate for detecting thinly provisioned storage. (Not implemented yet.) innodb_atomic_writes: Correct the comment. buf_flush_page(): Support all values of fil_node_t::punch_hole. On a thinly provisioned storage device, we will always write NUL-padded innodb_page_size bytes also for page_compressed tables. buf_flush_freed_pages(): Remove a redundant condition. fil_space_t::atomic_write_supported: Remove. (This was duplicating fil_node_t::atomic_write.) fil_space_t::punch_hole: Remove. (Duplicated fil_node_t::punch_hole.) fil_node_t: Remove magic_n, and consolidate flags into bitfields. For punch_hole we introduce a third value that indicates a thinly provisioned storage device. fil_node_t::find_metadata(): Detect all attributes of the file.
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
|
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
|
||||||
Copyright (c) 2010, 2020, MariaDB Corporation.
|
Copyright (c) 2010, 2021, MariaDB Corporation.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@@ -183,10 +183,11 @@ extern BOOL my_obtain_privilege(LPCSTR lpPrivilege);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
void my_init_atomic_write(void);
|
void my_init_atomic_write(void);
|
||||||
|
#define my_test_if_thinly_provisioned(A) 0
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
my_bool my_test_if_atomic_write(File handle, int pagesize);
|
my_bool my_test_if_atomic_write(File handle, int pagesize);
|
||||||
#else
|
#else
|
||||||
#define my_test_if_atomic_write(A, B) 0
|
# define my_test_if_atomic_write(A, B) 0
|
||||||
#endif /* __linux__ */
|
#endif /* __linux__ */
|
||||||
extern my_bool my_may_have_atomic_write;
|
extern my_bool my_may_have_atomic_write;
|
||||||
|
|
||||||
|
@@ -1754,7 +1754,7 @@ SESSION_VALUE NULL
|
|||||||
DEFAULT_VALUE ON
|
DEFAULT_VALUE ON
|
||||||
VARIABLE_SCOPE GLOBAL
|
VARIABLE_SCOPE GLOBAL
|
||||||
VARIABLE_TYPE BOOLEAN
|
VARIABLE_TYPE BOOLEAN
|
||||||
VARIABLE_COMMENT Enable atomic writes, instead of using the doublewrite buffer, for files on devices that supports atomic writes. This option only works on Linux with either FusionIO cards using the directFS filesystem or with Shannon cards using any file system.
|
VARIABLE_COMMENT Enable atomic writes, instead of using the doublewrite buffer, for files on devices that supports atomic writes.
|
||||||
NUMERIC_MIN_VALUE NULL
|
NUMERIC_MIN_VALUE NULL
|
||||||
NUMERIC_MAX_VALUE NULL
|
NUMERIC_MAX_VALUE NULL
|
||||||
NUMERIC_BLOCK_SIZE NULL
|
NUMERIC_BLOCK_SIZE NULL
|
||||||
|
@@ -712,6 +712,7 @@ void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size)
|
|||||||
ut_ad(request.bpage);
|
ut_ad(request.bpage);
|
||||||
ut_ad(request.bpage->in_file());
|
ut_ad(request.bpage->in_file());
|
||||||
ut_ad(request.node);
|
ut_ad(request.node);
|
||||||
|
ut_ad(request.node->space->purpose == FIL_TYPE_TABLESPACE);
|
||||||
ut_ad(request.node->space->id == request.bpage->id().space());
|
ut_ad(request.node->space->id == request.bpage->id().space());
|
||||||
ut_ad(request.node->space->referenced());
|
ut_ad(request.node->space->referenced());
|
||||||
ut_ad(!srv_read_only_mode);
|
ut_ad(!srv_read_only_mode);
|
||||||
|
@@ -804,8 +804,6 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
|
|||||||
ut_ad(bpage->ready_for_flush());
|
ut_ad(bpage->ready_for_flush());
|
||||||
ut_ad((space->purpose == FIL_TYPE_TEMPORARY) ==
|
ut_ad((space->purpose == FIL_TYPE_TEMPORARY) ==
|
||||||
(space == fil_system.temp_space));
|
(space == fil_system.temp_space));
|
||||||
ut_ad(space->purpose == FIL_TYPE_TABLESPACE ||
|
|
||||||
space->atomic_write_supported);
|
|
||||||
ut_ad(space->referenced());
|
ut_ad(space->referenced());
|
||||||
ut_ad(lru || space != fil_system.temp_space);
|
ut_ad(lru || space != fil_system.temp_space);
|
||||||
|
|
||||||
@@ -912,8 +910,16 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32
|
#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32
|
||||||
if (size != orig_size && space->punch_hole)
|
if (size != orig_size)
|
||||||
type= lru ? IORequest::PUNCH_LRU : IORequest::PUNCH;
|
{
|
||||||
|
switch (space->chain.start->punch_hole) {
|
||||||
|
case 1:
|
||||||
|
type= lru ? IORequest::PUNCH_LRU : IORequest::PUNCH;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
size= orig_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
frame=page;
|
frame=page;
|
||||||
}
|
}
|
||||||
@@ -1036,8 +1042,8 @@ innodb_immediate_scrub_data_uncompressed from the freed ranges.
|
|||||||
@param space tablespace which may contain ranges of freed pages */
|
@param space tablespace which may contain ranges of freed pages */
|
||||||
static void buf_flush_freed_pages(fil_space_t *space)
|
static void buf_flush_freed_pages(fil_space_t *space)
|
||||||
{
|
{
|
||||||
const bool punch_hole= space->punch_hole;
|
const bool punch_hole= space->chain.start->punch_hole == 1;
|
||||||
if (!srv_immediate_scrub_data_uncompressed && !punch_hole)
|
if (!punch_hole && !srv_immediate_scrub_data_uncompressed)
|
||||||
return;
|
return;
|
||||||
lsn_t flush_to_disk_lsn= log_sys.get_flushed_lsn();
|
lsn_t flush_to_disk_lsn= log_sys.get_flushed_lsn();
|
||||||
|
|
||||||
@@ -1064,7 +1070,7 @@ static void buf_flush_freed_pages(fil_space_t *space)
|
|||||||
(range.last - range.first + 1) * physical_size,
|
(range.last - range.first + 1) * physical_size,
|
||||||
nullptr);
|
nullptr);
|
||||||
}
|
}
|
||||||
else if (srv_immediate_scrub_data_uncompressed)
|
else
|
||||||
{
|
{
|
||||||
for (os_offset_t i= range.first; i <= range.last; i++)
|
for (os_offset_t i= range.first; i <= range.last; i++)
|
||||||
{
|
{
|
||||||
|
@@ -317,8 +317,6 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
|
|||||||
|
|
||||||
node->size = size;
|
node->size = size;
|
||||||
|
|
||||||
node->magic_n = FIL_NODE_MAGIC_N;
|
|
||||||
|
|
||||||
node->init_size = size;
|
node->init_size = size;
|
||||||
node->max_size = max_pages;
|
node->max_size = max_pages;
|
||||||
|
|
||||||
@@ -718,7 +716,6 @@ bool fil_space_extend(fil_space_t *space, uint32_t size)
|
|||||||
inline pfs_os_file_t fil_node_t::close_to_free(bool detach_handle)
|
inline pfs_os_file_t fil_node_t::close_to_free(bool detach_handle)
|
||||||
{
|
{
|
||||||
mysql_mutex_assert_owner(&fil_system.mutex);
|
mysql_mutex_assert_owner(&fil_system.mutex);
|
||||||
ut_a(magic_n == FIL_NODE_MAGIC_N);
|
|
||||||
ut_a(!being_extended);
|
ut_a(!being_extended);
|
||||||
|
|
||||||
if (is_open() &&
|
if (is_open() &&
|
||||||
@@ -941,16 +938,6 @@ fil_space_t *fil_space_t::create(ulint id, ulint flags,
|
|||||||
|
|
||||||
space->latch.SRW_LOCK_INIT(fil_space_latch_key);
|
space->latch.SRW_LOCK_INIT(fil_space_latch_key);
|
||||||
|
|
||||||
if (space->purpose == FIL_TYPE_TEMPORARY) {
|
|
||||||
/* SysTablespace::open_or_create() would pass
|
|
||||||
size!=0 to fil_space_t::add(), so first_time_open
|
|
||||||
would not hold in fil_node_open_file(), and we
|
|
||||||
must assign this manually. We do not care about
|
|
||||||
the durability or atomicity of writes to the
|
|
||||||
temporary tablespace files. */
|
|
||||||
space->atomic_write_supported = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
mysql_mutex_lock(&fil_system.mutex);
|
mysql_mutex_lock(&fil_system.mutex);
|
||||||
|
|
||||||
if (const fil_space_t *old_space = fil_space_get_by_id(id)) {
|
if (const fil_space_t *old_space = fil_space_get_by_id(id)) {
|
||||||
@@ -1951,9 +1938,6 @@ skip_second_rename:
|
|||||||
return(success);
|
return(success);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FIXME: remove this! */
|
|
||||||
IF_WIN(, bool os_is_sparse_file_supported(os_file_t fh));
|
|
||||||
|
|
||||||
/** Create a tablespace file.
|
/** Create a tablespace file.
|
||||||
@param[in] space_id Tablespace ID
|
@param[in] space_id Tablespace ID
|
||||||
@param[in] name Tablespace name in dbname/tablename format.
|
@param[in] name Tablespace name in dbname/tablename format.
|
||||||
@@ -2041,7 +2025,6 @@ fil_ibd_create(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const bool is_compressed = fil_space_t::is_compressed(flags);
|
const bool is_compressed = fil_space_t::is_compressed(flags);
|
||||||
bool punch_hole = is_compressed;
|
|
||||||
fil_space_crypt_t* crypt_data = nullptr;
|
fil_space_crypt_t* crypt_data = nullptr;
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
if (is_compressed) {
|
if (is_compressed) {
|
||||||
@@ -2060,9 +2043,6 @@ err_exit:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FIXME: remove this */
|
|
||||||
IF_WIN(, punch_hole = punch_hole && os_is_sparse_file_supported(file));
|
|
||||||
|
|
||||||
/* We have to write the space id to the file immediately and flush the
|
/* We have to write the space id to the file immediately and flush the
|
||||||
file to disk. This is because in crash recovery we must be aware what
|
file to disk. This is because in crash recovery we must be aware what
|
||||||
tablespaces exist and what are their space id's, so that we can apply
|
tablespaces exist and what are their space id's, so that we can apply
|
||||||
@@ -2115,9 +2095,8 @@ err_exit:
|
|||||||
if (fil_space_t* space = fil_space_t::create(space_id, flags,
|
if (fil_space_t* space = fil_space_t::create(space_id, flags,
|
||||||
FIL_TYPE_TABLESPACE,
|
FIL_TYPE_TABLESPACE,
|
||||||
crypt_data, mode)) {
|
crypt_data, mode)) {
|
||||||
space->punch_hole = punch_hole;
|
|
||||||
fil_node_t* node = space->add(path, file, size, false, true);
|
fil_node_t* node = space->add(path, file, size, false, true);
|
||||||
node->find_metadata(file);
|
IF_WIN(node->find_metadata(), node->find_metadata(file, true));
|
||||||
mtr.start();
|
mtr.start();
|
||||||
mtr.set_named_space(space);
|
mtr.set_named_space(space);
|
||||||
fsp_header_init(space, size, &mtr);
|
fsp_header_init(space, size, &mtr);
|
||||||
@@ -2878,7 +2857,7 @@ fil_io_t fil_space_t::io(const IORequest &type, os_offset_t offset, size_t len,
|
|||||||
/* Punch hole is not supported, make space not to
|
/* Punch hole is not supported, make space not to
|
||||||
support punch hole */
|
support punch hole */
|
||||||
if (UNIV_UNLIKELY(err == DB_IO_NO_PUNCH_HOLE)) {
|
if (UNIV_UNLIKELY(err == DB_IO_NO_PUNCH_HOLE)) {
|
||||||
punch_hole = false;
|
node->punch_hole = false;
|
||||||
err = DB_SUCCESS;
|
err = DB_SUCCESS;
|
||||||
}
|
}
|
||||||
goto release_sync_write;
|
goto release_sync_write;
|
||||||
|
@@ -18510,9 +18510,7 @@ static MYSQL_SYSVAR_BOOL(doublewrite, srv_use_doublewrite_buf,
|
|||||||
static MYSQL_SYSVAR_BOOL(use_atomic_writes, srv_use_atomic_writes,
|
static MYSQL_SYSVAR_BOOL(use_atomic_writes, srv_use_atomic_writes,
|
||||||
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
|
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
|
||||||
"Enable atomic writes, instead of using the doublewrite buffer, for files "
|
"Enable atomic writes, instead of using the doublewrite buffer, for files "
|
||||||
"on devices that supports atomic writes. "
|
"on devices that supports atomic writes.",
|
||||||
"This option only works on Linux with either FusionIO cards using "
|
|
||||||
"the directFS filesystem or with Shannon cards using any file system.",
|
|
||||||
NULL, NULL, TRUE);
|
NULL, NULL, TRUE);
|
||||||
|
|
||||||
static MYSQL_SYSVAR_BOOL(stats_include_delete_marked,
|
static MYSQL_SYSVAR_BOOL(stats_include_delete_marked,
|
||||||
|
@@ -424,13 +424,6 @@ public:
|
|||||||
/** Checks that this tablespace needs key rotation. */
|
/** Checks that this tablespace needs key rotation. */
|
||||||
bool is_in_default_encrypt;
|
bool is_in_default_encrypt;
|
||||||
|
|
||||||
/** True if the device this filespace is on supports atomic writes */
|
|
||||||
bool atomic_write_supported;
|
|
||||||
|
|
||||||
/** True if file system storing this tablespace supports
|
|
||||||
punch hole */
|
|
||||||
bool punch_hole;
|
|
||||||
|
|
||||||
/** mutex to protect freed ranges */
|
/** mutex to protect freed ranges */
|
||||||
std::mutex freed_range_mutex;
|
std::mutex freed_range_mutex;
|
||||||
|
|
||||||
@@ -444,11 +437,7 @@ public:
|
|||||||
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
|
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
|
||||||
|
|
||||||
/** @return whether doublewrite buffering is needed */
|
/** @return whether doublewrite buffering is needed */
|
||||||
bool use_doublewrite() const
|
inline bool use_doublewrite() const;
|
||||||
{
|
|
||||||
return !atomic_write_supported && srv_use_doublewrite_buf &&
|
|
||||||
buf_dblwr.is_initialised();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Append a file to the chain of files of a space.
|
/** Append a file to the chain of files of a space.
|
||||||
@param[in] name file name of a file that is not open
|
@param[in] name file name of a file that is not open
|
||||||
@@ -509,6 +498,8 @@ public:
|
|||||||
/** @return whether the storage device is rotational (HDD, not SSD) */
|
/** @return whether the storage device is rotational (HDD, not SSD) */
|
||||||
inline bool is_rotational() const;
|
inline bool is_rotational() const;
|
||||||
|
|
||||||
|
/** whether the tablespace discovery is being deferred during crash
|
||||||
|
recovery due to incompletely written page 0 */
|
||||||
inline bool is_deferred() const;
|
inline bool is_deferred() const;
|
||||||
|
|
||||||
/** Open each file. Never invoked on .ibd files.
|
/** Open each file. Never invoked on .ibd files.
|
||||||
@@ -1066,60 +1057,56 @@ private:
|
|||||||
/** File node of a tablespace or the log data space */
|
/** File node of a tablespace or the log data space */
|
||||||
struct fil_node_t final
|
struct fil_node_t final
|
||||||
{
|
{
|
||||||
/** tablespace containing this file */
|
/** tablespace containing this file */
|
||||||
fil_space_t* space;
|
fil_space_t *space;
|
||||||
/** file name; protected by fil_system.mutex and log_sys.mutex. */
|
/** file name; protected by fil_system.mutex and log_sys.mutex */
|
||||||
char* name;
|
char *name;
|
||||||
/** file handle (valid if is_open) */
|
/** file handle */
|
||||||
pfs_os_file_t handle;
|
pfs_os_file_t handle;
|
||||||
/** whether the file actually is a raw device or disk partition */
|
/** whether the file is on non-rotational media (SSD) */
|
||||||
bool is_raw_disk;
|
unsigned on_ssd:1;
|
||||||
/** whether the file is on non-rotational media (SSD) */
|
/** how to write page_compressed tables
|
||||||
bool on_ssd;
|
(0=do not punch holes but write minimal amount of data, 1=punch holes,
|
||||||
/** size of the file in database pages (0 if not known yet);
|
2=always write the same amount; thinly provisioned storage will compress) */
|
||||||
the possible last incomplete megabyte may be ignored
|
unsigned punch_hole:2;
|
||||||
if space->id == 0 */
|
/** whether this file could use atomic write */
|
||||||
uint32_t size;
|
unsigned atomic_write:1;
|
||||||
/** initial size of the file in database pages;
|
/** whether the file actually is a raw device or disk partition */
|
||||||
FIL_IBD_FILE_INITIAL_SIZE by default */
|
unsigned is_raw_disk:1;
|
||||||
uint32_t init_size;
|
/** whether the tablespace discovery is being deferred during crash
|
||||||
/** maximum size of the file in database pages (0 if unlimited) */
|
recovery due to incompletely written page 0 */
|
||||||
uint32_t max_size;
|
unsigned deferred:1;
|
||||||
/** whether the file is currently being extended */
|
|
||||||
Atomic_relaxed<bool> being_extended;
|
|
||||||
/** link to other files in this tablespace */
|
|
||||||
UT_LIST_NODE_T(fil_node_t) chain;
|
|
||||||
|
|
||||||
/** whether this file could use atomic write (data file) */
|
/** size of the file in database pages (0 if not known yet);
|
||||||
bool atomic_write;
|
the possible last incomplete megabyte may be ignored if space->id == 0 */
|
||||||
|
uint32_t size;
|
||||||
|
/** initial size of the file in database pages;
|
||||||
|
FIL_IBD_FILE_INITIAL_SIZE by default */
|
||||||
|
uint32_t init_size;
|
||||||
|
/** maximum size of the file in database pages (0 if unlimited) */
|
||||||
|
uint32_t max_size;
|
||||||
|
/** whether the file is currently being extended */
|
||||||
|
Atomic_relaxed<bool> being_extended;
|
||||||
|
/** link to other files in this tablespace */
|
||||||
|
UT_LIST_NODE_T(fil_node_t) chain;
|
||||||
|
|
||||||
/** Filesystem block size */
|
/** Filesystem block size */
|
||||||
ulint block_size;
|
ulint block_size;
|
||||||
|
|
||||||
/** Deferring the tablespace during recovery and it
|
/** @return whether this file is open */
|
||||||
can be used to skip the validation of page0 */
|
bool is_open() const { return handle != OS_FILE_CLOSED; }
|
||||||
bool deferred=false;
|
|
||||||
|
|
||||||
/** FIL_NODE_MAGIC_N */
|
/** Read the first page of a data file.
|
||||||
ulint magic_n;
|
@return whether the page was found valid */
|
||||||
|
bool read_page0();
|
||||||
|
|
||||||
/** @return whether this file is open */
|
/** Determine some file metadata when creating or reading the file.
|
||||||
bool is_open() const
|
@param file the file that is being created, or OS_FILE_CLOSED */
|
||||||
{
|
void find_metadata(os_file_t file= OS_FILE_CLOSED
|
||||||
return(handle != OS_FILE_CLOSED);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Read the first page of a data file.
|
|
||||||
@return whether the page was found valid */
|
|
||||||
bool read_page0();
|
|
||||||
|
|
||||||
/** Determine some file metadata when creating or reading the file.
|
|
||||||
@param file the file that is being created, or OS_FILE_CLOSED */
|
|
||||||
void find_metadata(os_file_t file = OS_FILE_CLOSED
|
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
, struct stat* statbuf = NULL
|
, bool create= false, struct stat *statbuf= nullptr
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
/** Close the file handle. */
|
/** Close the file handle. */
|
||||||
void close();
|
void close();
|
||||||
@@ -1138,8 +1125,11 @@ private:
|
|||||||
void prepare_to_close_or_detach();
|
void prepare_to_close_or_detach();
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Value of fil_node_t::magic_n */
|
inline bool fil_space_t::use_doublewrite() const
|
||||||
#define FIL_NODE_MAGIC_N 89389
|
{
|
||||||
|
return !UT_LIST_GET_FIRST(chain)->atomic_write && srv_use_doublewrite_buf &&
|
||||||
|
buf_dblwr.is_initialised();
|
||||||
|
}
|
||||||
|
|
||||||
inline void fil_space_t::set_imported()
|
inline void fil_space_t::set_imported()
|
||||||
{
|
{
|
||||||
|
@@ -3233,7 +3233,7 @@ os_file_set_nocache(
|
|||||||
/** Check if the file system supports sparse files.
|
/** Check if the file system supports sparse files.
|
||||||
@param fh file handle
|
@param fh file handle
|
||||||
@return true if the file system supports sparse files */
|
@return true if the file system supports sparse files */
|
||||||
IF_WIN(static,) bool os_is_sparse_file_supported(os_file_t fh)
|
static bool os_is_sparse_file_supported(os_file_t fh)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
FILE_ATTRIBUTE_TAG_INFO info;
|
FILE_ATTRIBUTE_TAG_INFO info;
|
||||||
@@ -3495,24 +3495,23 @@ dberr_t IORequest::punch_hole(os_offset_t off, ulint len) const
|
|||||||
|
|
||||||
/* Check does file system support punching holes for this
|
/* Check does file system support punching holes for this
|
||||||
tablespace. */
|
tablespace. */
|
||||||
if (!node->space->punch_hole) {
|
if (!node->punch_hole) {
|
||||||
return DB_IO_NO_PUNCH_HOLE;
|
return DB_IO_NO_PUNCH_HOLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
dberr_t err = os_file_punch_hole(node->handle, off, trim_len);
|
dberr_t err = os_file_punch_hole(node->handle, off, trim_len);
|
||||||
|
|
||||||
if (err == DB_SUCCESS) {
|
switch (err) {
|
||||||
|
case DB_SUCCESS:
|
||||||
srv_stats.page_compressed_trim_op.inc();
|
srv_stats.page_compressed_trim_op.inc();
|
||||||
} else {
|
return err;
|
||||||
/* If punch hole is not supported,
|
case DB_IO_NO_PUNCH_HOLE:
|
||||||
set space so that it is not used. */
|
node->punch_hole = false;
|
||||||
if (err == DB_IO_NO_PUNCH_HOLE) {
|
err = DB_SUCCESS;
|
||||||
node->space->punch_hole = false;
|
/* fall through */
|
||||||
err = DB_SUCCESS;
|
default:
|
||||||
}
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (err);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This function returns information about the specified file
|
/** This function returns information about the specified file
|
||||||
@@ -4101,81 +4100,56 @@ static bool is_file_on_ssd(char *file_path)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** Determine some file metadata when creating or reading the file.
|
|
||||||
@param file the file that is being created, or OS_FILE_CLOSED */
|
|
||||||
void fil_node_t::find_metadata(os_file_t file
|
void fil_node_t::find_metadata(os_file_t file
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
, struct stat* statbuf
|
, bool create, struct stat *statbuf
|
||||||
#endif
|
#endif
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
if (file == OS_FILE_CLOSED) {
|
if (!is_open())
|
||||||
file = handle;
|
{
|
||||||
ut_ad(is_open());
|
handle= file;
|
||||||
}
|
ut_ad(is_open());
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef _WIN32 /* FIXME: make this unconditional */
|
if (!space->is_compressed())
|
||||||
if (space->punch_hole) {
|
punch_hole= 0;
|
||||||
space->punch_hole = os_is_sparse_file_supported(file);
|
else if (my_test_if_thinly_provisioned(file))
|
||||||
}
|
punch_hole= 2;
|
||||||
#endif
|
else
|
||||||
|
punch_hole= IF_WIN(, !create ||) os_is_sparse_file_supported(file);
|
||||||
|
|
||||||
/*
|
|
||||||
For the temporary tablespace and during the
|
|
||||||
non-redo-logged adjustments in
|
|
||||||
IMPORT TABLESPACE, we do not care about
|
|
||||||
the atomicity of writes.
|
|
||||||
|
|
||||||
Atomic writes is supported if the file can be used
|
|
||||||
with atomic_writes (not log file), O_DIRECT is
|
|
||||||
used (tested in ha_innodb.cc) and the file is
|
|
||||||
device and file system that supports atomic writes
|
|
||||||
for the given block size.
|
|
||||||
*/
|
|
||||||
space->atomic_write_supported = space->purpose == FIL_TYPE_TEMPORARY
|
|
||||||
|| space->purpose == FIL_TYPE_IMPORT;
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
on_ssd = is_file_on_ssd(name);
|
on_ssd= is_file_on_ssd(name);
|
||||||
FILE_STORAGE_INFO info;
|
FILE_STORAGE_INFO info;
|
||||||
if (GetFileInformationByHandleEx(
|
if (GetFileInformationByHandleEx(file, FileStorageInfo, &info, sizeof info))
|
||||||
file, FileStorageInfo, &info, sizeof(info))) {
|
block_size= info.PhysicalBytesPerSectorForAtomicity;
|
||||||
block_size = info.PhysicalBytesPerSectorForAtomicity;
|
else
|
||||||
} else {
|
block_size= 512;
|
||||||
block_size = 512;
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
struct stat sbuf;
|
struct stat sbuf;
|
||||||
if (!statbuf && !fstat(file, &sbuf)) {
|
if (!statbuf && !fstat(file, &sbuf))
|
||||||
statbuf = &sbuf;
|
statbuf= &sbuf;
|
||||||
}
|
if (statbuf)
|
||||||
if (statbuf) {
|
block_size= statbuf->st_blksize;
|
||||||
block_size = statbuf->st_blksize;
|
|
||||||
}
|
|
||||||
on_ssd = space->atomic_write_supported
|
|
||||||
# ifdef UNIV_LINUX
|
# ifdef UNIV_LINUX
|
||||||
|| (statbuf && fil_system.is_ssd(statbuf->st_dev))
|
on_ssd= statbuf && fil_system.is_ssd(statbuf->st_dev);
|
||||||
# endif
|
# endif
|
||||||
;
|
|
||||||
#endif
|
#endif
|
||||||
if (!space->atomic_write_supported) {
|
|
||||||
space->atomic_write_supported = atomic_write
|
if (space->purpose != FIL_TYPE_TABLESPACE)
|
||||||
&& srv_use_atomic_writes
|
{
|
||||||
#ifndef _WIN32
|
/* For temporary tablespace or during IMPORT TABLESPACE, we
|
||||||
&& my_test_if_atomic_write(file,
|
disable neighbour flushing and do not care about atomicity. */
|
||||||
space->physical_size())
|
on_ssd= true;
|
||||||
#else
|
atomic_write= true;
|
||||||
/* On Windows, all single sector writes are atomic,
|
}
|
||||||
as per WriteFile() documentation on MSDN.
|
else
|
||||||
We also require SSD for atomic writes, eventhough
|
/* On Windows, all single sector writes are atomic, as per
|
||||||
technically it is not necessary- the reason is that
|
WriteFile() documentation on MSDN. */
|
||||||
on hard disks, we still want the benefit from
|
atomic_write= srv_use_atomic_writes &&
|
||||||
(non-atomic) neighbor page flushing in the buffer
|
IF_WIN(srv_page_size == block_size,
|
||||||
pool code. */
|
my_test_if_atomic_write(file, space->physical_size()));
|
||||||
&& srv_page_size == block_size
|
|
||||||
&& on_ssd
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Read the first page of a data file.
|
/** Read the first page of a data file.
|
||||||
@@ -4270,20 +4244,16 @@ invalid:
|
|||||||
space->free_len= free_len;
|
space->free_len= free_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef UNIV_LINUX
|
IF_WIN(find_metadata(), find_metadata(handle, false, &statbuf));
|
||||||
find_metadata(handle, &statbuf);
|
|
||||||
#else
|
|
||||||
find_metadata();
|
|
||||||
#endif
|
|
||||||
/* Truncate the size to a multiple of extent size. */
|
/* Truncate the size to a multiple of extent size. */
|
||||||
ulint mask= psize * FSP_EXTENT_SIZE - 1;
|
ulint mask= psize * FSP_EXTENT_SIZE - 1;
|
||||||
|
|
||||||
if (size_bytes <= mask);
|
if (size_bytes <= mask);
|
||||||
/* .ibd files start smaller than an
|
/* .ibd files start smaller than an
|
||||||
extent size. Do not truncate valid data. */
|
extent size. Do not truncate valid data. */
|
||||||
else size_bytes &= ~os_offset_t(mask);
|
else
|
||||||
|
size_bytes&= ~os_offset_t(mask);
|
||||||
|
|
||||||
space->punch_hole= space->is_compressed();
|
|
||||||
this->size= uint32_t(size_bytes / psize);
|
this->size= uint32_t(size_bytes / psize);
|
||||||
space->set_sizes(this->size);
|
space->set_sizes(this->size);
|
||||||
return true;
|
return true;
|
||||||
|
@@ -3436,7 +3436,7 @@ fil_iterate(
|
|||||||
required by buf_zip_decompress() */
|
required by buf_zip_decompress() */
|
||||||
dberr_t err = DB_SUCCESS;
|
dberr_t err = DB_SUCCESS;
|
||||||
bool page_compressed = false;
|
bool page_compressed = false;
|
||||||
bool punch_hole = true;
|
bool punch_hole = !my_test_if_thinly_provisioned(iter.file);
|
||||||
|
|
||||||
for (offset = iter.start; offset < iter.end; offset += n_bytes) {
|
for (offset = iter.start; offset < iter.end; offset += n_bytes) {
|
||||||
if (callback.is_interrupted()) {
|
if (callback.is_interrupted()) {
|
||||||
|
Reference in New Issue
Block a user