mirror of
https://github.com/MariaDB/server.git
synced 2025-12-24 11:21:21 +03:00
MDEV-8901: InnoDB: Punch hole is incorrecty done also to log files causing assertion and database corruption
Analysis: Problem is that punch hole does not know the actual page size of the page and does the page belong to an data file or to a log file. Fix: Pass down the file type and page size to os layer to be used when trim is called. Also fix unsafe null pointer access to actual write_size.
This commit is contained in:
@@ -219,6 +219,7 @@
|
||||
#cmakedefine HAVE_POSIX_FALLOCATE 1
|
||||
#cmakedefine HAVE_LINUX_FALLOC_H 1
|
||||
#cmakedefine HAVE_FALLOCATE 1
|
||||
#cmakedefine HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE 1
|
||||
#cmakedefine HAVE_PREAD 1
|
||||
#cmakedefine HAVE_PAUSE_INSTRUCTION 1
|
||||
#cmakedefine HAVE_FAKE_PAUSE_INSTRUCTION 1
|
||||
|
||||
@@ -1058,3 +1058,22 @@ CHECK_STRUCT_HAS_MEMBER("struct dirent" d_ino "dirent.h" STRUCT_DIRENT_HAS_D_IN
|
||||
CHECK_STRUCT_HAS_MEMBER("struct dirent" d_namlen "dirent.h" STRUCT_DIRENT_HAS_D_NAMLEN)
|
||||
SET(SPRINTF_RETURNS_INT 1)
|
||||
CHECK_INCLUDE_FILE(ucontext.h HAVE_UCONTEXT_H)
|
||||
|
||||
IF(NOT MSVC)
|
||||
CHECK_C_SOURCE_RUNS(
|
||||
"
|
||||
#define _GNU_SOURCE
|
||||
#include <fcntl.h>
|
||||
#include <linux/falloc.h>
|
||||
int main()
|
||||
{
|
||||
/* Ignore the return value for now. Check if the flags exist.
|
||||
The return value is checked at runtime. */
|
||||
fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0);
|
||||
|
||||
return(0);
|
||||
}"
|
||||
HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
|
||||
@@ -5223,9 +5223,9 @@ retry:
|
||||
success = os_file_write(node->name, node->handle, buf,
|
||||
offset, page_size * n_pages);
|
||||
#else
|
||||
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
|
||||
success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
|
||||
node->name, node->handle, buf,
|
||||
offset, page_size * n_pages,
|
||||
offset, page_size * n_pages, page_size,
|
||||
node, NULL, 0);
|
||||
#endif /* UNIV_HOTBACKUP */
|
||||
|
||||
@@ -5872,12 +5872,14 @@ fil_io(
|
||||
/* Queue the aio request */
|
||||
ret = os_aio(
|
||||
type,
|
||||
is_log,
|
||||
mode | wake_later,
|
||||
node->name,
|
||||
node->handle,
|
||||
buf,
|
||||
offset,
|
||||
len,
|
||||
zip_size ? zip_size : UNIV_PAGE_SIZE,
|
||||
node,
|
||||
message,
|
||||
write_size);
|
||||
|
||||
@@ -311,10 +311,10 @@ The wrapper functions have the prefix of "innodb_". */
|
||||
# define os_file_close(file) \
|
||||
pfs_os_file_close_func(file, __FILE__, __LINE__)
|
||||
|
||||
# define os_aio(type, mode, name, file, buf, offset, \
|
||||
n, message1, message2, write_size) \
|
||||
pfs_os_aio_func(type, mode, name, file, buf, offset, \
|
||||
n, message1, message2, write_size, \
|
||||
# define os_aio(type, is_log, mode, name, file, buf, offset, \
|
||||
n, page_size, message1, message2, write_size) \
|
||||
pfs_os_aio_func(type, is_log, mode, name, file, buf, offset, \
|
||||
n, page_size, message1, message2, write_size, \
|
||||
__FILE__, __LINE__)
|
||||
|
||||
|
||||
@@ -357,10 +357,10 @@ to original un-instrumented file I/O APIs */
|
||||
|
||||
# define os_file_close(file) os_file_close_func(file)
|
||||
|
||||
# define os_aio(type, mode, name, file, buf, offset, n, message1, \
|
||||
# define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \
|
||||
message2, write_size) \
|
||||
os_aio_func(type, mode, name, file, buf, offset, n, \
|
||||
message1, message2, write_size)
|
||||
os_aio_func(type, is_log, mode, name, file, buf, offset, n, \
|
||||
page_size, message1, message2, write_size)
|
||||
|
||||
# define os_file_read(file, buf, offset, n) \
|
||||
os_file_read_func(file, buf, offset, n)
|
||||
@@ -749,6 +749,7 @@ ibool
|
||||
pfs_os_aio_func(
|
||||
/*============*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
|
||||
const char* name, /*!< in: name of the file or path as a
|
||||
null-terminated string */
|
||||
@@ -757,6 +758,7 @@ pfs_os_aio_func(
|
||||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
@@ -1107,6 +1109,7 @@ ibool
|
||||
os_aio_func(
|
||||
/*========*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
|
||||
to OS_AIO_SIMULATED_WAKE_LATER: the
|
||||
last flag advises this function not to wake
|
||||
@@ -1127,6 +1130,7 @@ os_aio_func(
|
||||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
|
||||
@@ -199,6 +199,7 @@ ibool
|
||||
pfs_os_aio_func(
|
||||
/*============*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
|
||||
const char* name, /*!< in: name of the file or path as a
|
||||
null-terminated string */
|
||||
@@ -207,6 +208,7 @@ pfs_os_aio_func(
|
||||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
@@ -234,8 +236,8 @@ pfs_os_aio_func(
|
||||
: PSI_FILE_READ,
|
||||
src_file, src_line);
|
||||
|
||||
result = os_aio_func(type, mode, name, file, buf, offset,
|
||||
n, message1, message2, write_size);
|
||||
result = os_aio_func(type, is_log, mode, name, file, buf, offset,
|
||||
n, page_size, message1, message2, write_size);
|
||||
|
||||
register_pfs_file_io_end(locker, n);
|
||||
|
||||
|
||||
@@ -49,9 +49,8 @@ Created 10/21/1995 Heikki Tuuri
|
||||
#include "buf0buf.h"
|
||||
#include "srv0mon.h"
|
||||
#include "srv0srv.h"
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
#ifdef HAVE_LINUX_UNISTD_H
|
||||
#include "unistd.h"
|
||||
#include "fcntl.h"
|
||||
#endif
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
# include "os0sync.h"
|
||||
@@ -84,14 +83,10 @@ Created 10/21/1995 Heikki Tuuri
|
||||
#include <linux/falloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_FALLOCATE)
|
||||
#ifndef FALLOC_FL_KEEP_SIZE
|
||||
#define FALLOC_FL_KEEP_SIZE 0x01
|
||||
#endif
|
||||
#ifndef FALLOC_FL_PUNCH_HOLE
|
||||
#define FALLOC_FL_PUNCH_HOLE 0x02
|
||||
#endif
|
||||
#endif
|
||||
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
|
||||
# include <fcntl.h>
|
||||
# include <linux/falloc.h>
|
||||
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
|
||||
|
||||
#ifdef HAVE_LZO
|
||||
#include "lzo/lzo1x.h"
|
||||
@@ -209,6 +204,9 @@ struct os_aio_slot_t{
|
||||
write */
|
||||
byte* buf; /*!< buffer used in i/o */
|
||||
ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log; /*!< 1 if OS_FILE_LOG or 0 */
|
||||
ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */
|
||||
|
||||
os_offset_t offset; /*!< file offset in bytes */
|
||||
os_file_t file; /*!< file where to read or write */
|
||||
const char* name; /*!< file name or path */
|
||||
@@ -4474,6 +4472,7 @@ os_aio_slot_t*
|
||||
os_aio_array_reserve_slot(
|
||||
/*======================*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
os_aio_array_t* array, /*!< in: aio array */
|
||||
fil_node_t* message1,/*!< in: message to be passed along with
|
||||
the aio operation */
|
||||
@@ -4486,6 +4485,7 @@ os_aio_array_reserve_slot(
|
||||
to write */
|
||||
os_offset_t offset, /*!< in: file offset */
|
||||
ulint len, /*!< in: length of the block to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
ulint* write_size)/*!< in/out: Actual write size initialized
|
||||
after fist successfull trim
|
||||
operation for this page and if
|
||||
@@ -4580,6 +4580,8 @@ found:
|
||||
slot->offset = offset;
|
||||
slot->io_already_done = FALSE;
|
||||
slot->write_size = write_size;
|
||||
slot->is_log = is_log;
|
||||
slot->page_size = page_size;
|
||||
|
||||
if (message1) {
|
||||
slot->file_block_size = fil_node_get_block_size(message1);
|
||||
@@ -4836,6 +4838,7 @@ ibool
|
||||
os_aio_func(
|
||||
/*========*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
|
||||
to OS_AIO_SIMULATED_WAKE_LATER: the
|
||||
last flag advises this function not to wake
|
||||
@@ -4856,6 +4859,7 @@ os_aio_func(
|
||||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
@@ -4982,8 +4986,8 @@ try_again:
|
||||
array = NULL; /* Eliminate compiler warning */
|
||||
}
|
||||
|
||||
slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
|
||||
name, buf, offset, n, write_size);
|
||||
slot = os_aio_array_reserve_slot(type, is_log, array, message1, message2, file,
|
||||
name, buf, offset, n, page_size, write_size);
|
||||
|
||||
if (type == OS_FILE_READ) {
|
||||
if (srv_use_native_aio) {
|
||||
@@ -5251,7 +5255,10 @@ os_aio_windows_handle(
|
||||
ret_val = ret && len == slot->len;
|
||||
}
|
||||
|
||||
if (slot->type == OS_FILE_WRITE && srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
if (slot->type == OS_FILE_WRITE &&
|
||||
!slot->is_log &&
|
||||
srv_use_trim &&
|
||||
os_fallocate_failed == FALSE) {
|
||||
// Deallocate unused blocks from file system
|
||||
os_file_trim(slot);
|
||||
}
|
||||
@@ -5345,7 +5352,10 @@ retry:
|
||||
/* We have not overstepped to next segment. */
|
||||
ut_a(slot->pos < end_pos);
|
||||
|
||||
if (slot->type == OS_FILE_WRITE && srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
if (slot->type == OS_FILE_WRITE &&
|
||||
!slot->is_log &&
|
||||
srv_use_trim &&
|
||||
os_fallocate_failed == FALSE) {
|
||||
// Deallocate unused blocks from file system
|
||||
os_file_trim(slot);
|
||||
}
|
||||
@@ -6220,19 +6230,13 @@ os_file_trim(
|
||||
{
|
||||
|
||||
size_t len = slot->len;
|
||||
size_t trim_len = UNIV_PAGE_SIZE - len;
|
||||
size_t trim_len = slot->page_size - len;
|
||||
os_offset_t off = slot->offset + len;
|
||||
size_t bsize = slot->file_block_size;
|
||||
|
||||
// len here should be alligned to sector size
|
||||
ut_ad((trim_len % bsize) == 0);
|
||||
ut_ad((len % bsize) == 0);
|
||||
ut_ad(bsize != 0);
|
||||
ut_ad((off % bsize) == 0);
|
||||
|
||||
#ifdef UNIV_TRIM_DEBUG
|
||||
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n",
|
||||
*slot->write_size, trim_len, len, off, bsize);
|
||||
slot->write_size ? *slot->write_size : 0, trim_len, len, off, bsize);
|
||||
#endif
|
||||
|
||||
// Nothing to do if trim length is zero or if actual write
|
||||
@@ -6247,22 +6251,19 @@ os_file_trim(
|
||||
*slot->write_size > 0 &&
|
||||
len >= *slot->write_size)) {
|
||||
|
||||
#ifdef UNIV_PAGECOMPRESS_DEBUG
|
||||
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n",
|
||||
*slot->write_size, trim_len, len);
|
||||
#endif
|
||||
if (slot->write_size) {
|
||||
if (*slot->write_size > 0 && len >= *slot->write_size) {
|
||||
srv_stats.page_compressed_trim_op_saved.inc();
|
||||
}
|
||||
|
||||
if (*slot->write_size > 0 && len >= *slot->write_size) {
|
||||
srv_stats.page_compressed_trim_op_saved.inc();
|
||||
*slot->write_size = len;
|
||||
}
|
||||
|
||||
*slot->write_size = len;
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
#if defined(HAVE_FALLOCATE)
|
||||
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
|
||||
int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
|
||||
|
||||
if (ret) {
|
||||
@@ -6300,7 +6301,7 @@ os_file_trim(
|
||||
*slot->write_size = 0;
|
||||
}
|
||||
|
||||
#endif /* HAVE_FALLOCATE ... */
|
||||
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE ... */
|
||||
|
||||
#elif defined(_WIN32)
|
||||
FILE_LEVEL_TRIM flt;
|
||||
|
||||
@@ -5253,9 +5253,9 @@ retry:
|
||||
success = os_file_write(node->name, node->handle, buf,
|
||||
offset, page_size * n_pages);
|
||||
#else
|
||||
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
|
||||
success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
|
||||
node->name, node->handle, buf,
|
||||
offset, page_size * n_pages,
|
||||
offset, page_size * n_pages, page_size,
|
||||
node, NULL, space_id, NULL, 0);
|
||||
#endif /* UNIV_HOTBACKUP */
|
||||
|
||||
@@ -5918,12 +5918,14 @@ _fil_io(
|
||||
/* Queue the aio request */
|
||||
ret = os_aio(
|
||||
type,
|
||||
is_log,
|
||||
mode | wake_later,
|
||||
node->name,
|
||||
node->handle,
|
||||
buf,
|
||||
offset,
|
||||
len,
|
||||
zip_size ? zip_size : UNIV_PAGE_SIZE,
|
||||
node,
|
||||
message,
|
||||
space_id,
|
||||
|
||||
@@ -321,11 +321,11 @@ The wrapper functions have the prefix of "innodb_". */
|
||||
# define os_file_close(file) \
|
||||
pfs_os_file_close_func(file, __FILE__, __LINE__)
|
||||
|
||||
# define os_aio(type, mode, name, file, buf, offset, \
|
||||
n, message1, message2, space_id, \
|
||||
# define os_aio(type, is_log, mode, name, file, buf, offset, \
|
||||
n, page_size, message1, message2, space_id, \
|
||||
trx, write_size) \
|
||||
pfs_os_aio_func(type, mode, name, file, buf, offset, \
|
||||
n, message1, message2, space_id, trx, write_size, \
|
||||
pfs_os_aio_func(type, is_log, mode, name, file, buf, offset, \
|
||||
n, page_size, message1, message2, space_id, trx, write_size, \
|
||||
__FILE__, __LINE__)
|
||||
|
||||
# define os_file_read(file, buf, offset, n) \
|
||||
@@ -372,10 +372,10 @@ to original un-instrumented file I/O APIs */
|
||||
|
||||
# define os_file_close(file) os_file_close_func(file)
|
||||
|
||||
# define os_aio(type, mode, name, file, buf, offset, n, message1, \
|
||||
# define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \
|
||||
message2, space_id, trx, write_size) \
|
||||
os_aio_func(type, mode, name, file, buf, offset, n, \
|
||||
message1, message2, space_id, trx, write_size)
|
||||
os_aio_func(type, is_log, mode, name, file, buf, offset, n, \
|
||||
page_size, message1, message2, space_id, trx, write_size)
|
||||
|
||||
# define os_file_read(file, buf, offset, n) \
|
||||
os_file_read_func(file, buf, offset, n, NULL)
|
||||
@@ -772,6 +772,7 @@ ibool
|
||||
pfs_os_aio_func(
|
||||
/*============*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
|
||||
const char* name, /*!< in: name of the file or path as a
|
||||
null-terminated string */
|
||||
@@ -780,6 +781,7 @@ pfs_os_aio_func(
|
||||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size,/*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
@@ -1139,6 +1141,7 @@ ibool
|
||||
os_aio_func(
|
||||
/*========*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
|
||||
to OS_AIO_SIMULATED_WAKE_LATER: the
|
||||
last flag advises this function not to wake
|
||||
@@ -1159,6 +1162,7 @@ os_aio_func(
|
||||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
|
||||
@@ -202,6 +202,7 @@ ibool
|
||||
pfs_os_aio_func(
|
||||
/*============*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
|
||||
const char* name, /*!< in: name of the file or path as a
|
||||
null-terminated string */
|
||||
@@ -210,6 +211,7 @@ pfs_os_aio_func(
|
||||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
@@ -239,8 +241,8 @@ pfs_os_aio_func(
|
||||
: PSI_FILE_READ,
|
||||
src_file, src_line);
|
||||
|
||||
result = os_aio_func(type, mode, name, file, buf, offset,
|
||||
n, message1, message2, space_id, trx,
|
||||
result = os_aio_func(type, is_log, mode, name, file, buf, offset,
|
||||
n, page_size, message1, message2, space_id, trx,
|
||||
write_size);
|
||||
|
||||
register_pfs_file_io_end(locker, n);
|
||||
|
||||
@@ -50,9 +50,8 @@ Created 10/21/1995 Heikki Tuuri
|
||||
#include "trx0trx.h"
|
||||
#include "srv0mon.h"
|
||||
#include "srv0srv.h"
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
#ifdef HAVE_LINUX_UNISTD_H
|
||||
#include "unistd.h"
|
||||
#include "fcntl.h"
|
||||
#endif
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
# include "os0sync.h"
|
||||
@@ -89,14 +88,10 @@ Created 10/21/1995 Heikki Tuuri
|
||||
#include <linux/falloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_FALLOCATE)
|
||||
#ifndef FALLOC_FL_KEEP_SIZE
|
||||
#define FALLOC_FL_KEEP_SIZE 0x01
|
||||
#endif
|
||||
#ifndef FALLOC_FL_PUNCH_HOLE
|
||||
#define FALLOC_FL_PUNCH_HOLE 0x02
|
||||
#endif
|
||||
#endif
|
||||
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
|
||||
# include <fcntl.h>
|
||||
# include <linux/falloc.h>
|
||||
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
|
||||
|
||||
#ifdef HAVE_LZO
|
||||
#include "lzo/lzo1x.h"
|
||||
@@ -221,6 +216,9 @@ struct os_aio_slot_t{
|
||||
write */
|
||||
byte* buf; /*!< buffer used in i/o */
|
||||
ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log; /*!< 1 is OS_FILE_LOG or 0 */
|
||||
ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */
|
||||
|
||||
os_offset_t offset; /*!< file offset in bytes */
|
||||
os_file_t file; /*!< file where to read or write */
|
||||
const char* name; /*!< file name or path */
|
||||
@@ -4573,6 +4571,7 @@ os_aio_slot_t*
|
||||
os_aio_array_reserve_slot(
|
||||
/*======================*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
os_aio_array_t* array, /*!< in: aio array */
|
||||
fil_node_t* message1,/*!< in: message to be passed along with
|
||||
the aio operation */
|
||||
@@ -4585,6 +4584,7 @@ os_aio_array_reserve_slot(
|
||||
to write */
|
||||
os_offset_t offset, /*!< in: file offset */
|
||||
ulint len, /*!< in: length of the block to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
ulint space_id,
|
||||
ulint* write_size)/*!< in/out: Actual write size initialized
|
||||
after first successfull trim
|
||||
@@ -4681,6 +4681,8 @@ found:
|
||||
slot->offset = offset;
|
||||
slot->io_already_done = FALSE;
|
||||
slot->space_id = space_id;
|
||||
slot->is_log = is_log;
|
||||
slot->page_size = page_size;
|
||||
|
||||
if (message1) {
|
||||
slot->file_block_size = fil_node_get_block_size(message1);
|
||||
@@ -4934,6 +4936,7 @@ ibool
|
||||
os_aio_func(
|
||||
/*========*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
|
||||
to OS_AIO_SIMULATED_WAKE_LATER: the
|
||||
last flag advises this function not to wake
|
||||
@@ -4954,6 +4957,7 @@ os_aio_func(
|
||||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
@@ -5072,8 +5076,8 @@ try_again:
|
||||
trx->io_read += n;
|
||||
}
|
||||
|
||||
slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
|
||||
name, buf, offset, n, space_id,
|
||||
slot = os_aio_array_reserve_slot(type, is_log, array, message1, message2, file,
|
||||
name, buf, offset, n, page_size, space_id,
|
||||
write_size);
|
||||
|
||||
if (type == OS_FILE_READ) {
|
||||
@@ -5294,7 +5298,7 @@ os_aio_windows_handle(
|
||||
}
|
||||
|
||||
if (slot->type == OS_FILE_WRITE) {
|
||||
if (srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
if (!slot->is_log && srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
// Deallocate unused blocks from file system
|
||||
os_file_trim(slot);
|
||||
}
|
||||
@@ -5390,7 +5394,7 @@ retry:
|
||||
ut_a(slot->pos < end_pos);
|
||||
|
||||
if (slot->type == OS_FILE_WRITE) {
|
||||
if (srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
if (!slot->is_log && srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
// Deallocate unused blocks from file system
|
||||
os_file_trim(slot);
|
||||
}
|
||||
@@ -6305,19 +6309,13 @@ os_file_trim(
|
||||
os_aio_slot_t* slot) /*!< in: slot structure */
|
||||
{
|
||||
size_t len = slot->len;
|
||||
size_t trim_len = UNIV_PAGE_SIZE - slot->len;
|
||||
size_t trim_len = slot->page_size - slot->len;
|
||||
os_offset_t off __attribute__((unused)) = slot->offset + len;
|
||||
size_t bsize = slot->file_block_size;
|
||||
|
||||
// len here should be alligned to sector size
|
||||
ut_ad((trim_len % bsize) == 0);
|
||||
ut_ad((len % bsize) == 0);
|
||||
ut_ad(bsize != 0);
|
||||
ut_ad((off % bsize) == 0);
|
||||
|
||||
#ifdef UNIV_TRIM_DEBUG
|
||||
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n",
|
||||
*slot->write_size, trim_len, len, off, bsize);
|
||||
slot->write_size ? *slot->write_size : 0, trim_len, len, off, bsize);
|
||||
#endif
|
||||
|
||||
// Nothing to do if trim length is zero or if actual write
|
||||
@@ -6332,22 +6330,19 @@ os_file_trim(
|
||||
*slot->write_size > 0 &&
|
||||
len >= *slot->write_size)) {
|
||||
|
||||
#ifdef UNIV_PAGECOMPRESS_DEBUG
|
||||
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n",
|
||||
*slot->write_size, trim_len, len);
|
||||
#endif
|
||||
if (slot->write_size) {
|
||||
if (*slot->write_size > 0 && len >= *slot->write_size) {
|
||||
srv_stats.page_compressed_trim_op_saved.inc();
|
||||
}
|
||||
|
||||
if (*slot->write_size > 0 && len >= *slot->write_size) {
|
||||
srv_stats.page_compressed_trim_op_saved.inc();
|
||||
*slot->write_size = len;
|
||||
}
|
||||
|
||||
*slot->write_size = len;
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
#if defined(HAVE_FALLOCATE)
|
||||
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
|
||||
int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
|
||||
|
||||
if (ret) {
|
||||
@@ -6385,7 +6380,7 @@ os_file_trim(
|
||||
*slot->write_size = 0;
|
||||
}
|
||||
|
||||
#endif /* HAVE_FALLOCATE ... */
|
||||
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE ... */
|
||||
|
||||
#elif defined(_WIN32)
|
||||
FILE_LEVEL_TRIM flt;
|
||||
|
||||
Reference in New Issue
Block a user