diff --git a/mysql-test/suite/encryption/t/innodb-discard-import-change.opt b/mysql-test/suite/encryption/t/innodb-discard-import-change.opt new file mode 100644 index 00000000000..ebf13f41150 --- /dev/null +++ b/mysql-test/suite/encryption/t/innodb-discard-import-change.opt @@ -0,0 +1 @@ +--loose-innodb-use-trim=0 \ No newline at end of file diff --git a/mysql-test/suite/innodb/include/have_innodb_punchhole.inc b/mysql-test/suite/innodb/include/have_innodb_punchhole.inc new file mode 100644 index 00000000000..74cd5c4e0f2 --- /dev/null +++ b/mysql-test/suite/innodb/include/have_innodb_punchhole.inc @@ -0,0 +1,4 @@ +if (!`SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_punch_hole' AND variable_value = 'ON'`) +{ + --skip Test requires InnoDB compiled with fallocate(FALLOC_PUNCH_HOLE| FALLOC_KEEP_SIZE) +} diff --git a/mysql-test/suite/innodb/r/innodb-trim.result b/mysql-test/suite/innodb/r/innodb-trim.result new file mode 100644 index 00000000000..40eac2f8b40 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb-trim.result @@ -0,0 +1,20 @@ +set global innodb_compression_algorithm = 1; +create table innodb_page_compressed (c1 int not null primary key auto_increment, b char(200), c char(200), d char(200)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; +Level Code Message +create procedure innodb_insert_proc (repeat_count int) +begin +declare current_num int; +set current_num = 0; +while current_num < repeat_count do +insert into innodb_page_compressed values (NULL,repeat('A',150),repeat('AB',75),repeat('B', 175)); +set current_num = current_num + 1; +end while; +end// +commit; +set autocommit=0; +call innodb_insert_proc(16000); +commit; +set autocommit=1; +DROP PROCEDURE innodb_insert_proc; +DROP TABLE innodb_page_compressed; diff --git a/mysql-test/suite/innodb/r/innodb_monitor.result b/mysql-test/suite/innodb/r/innodb_monitor.result index 0a163193b58..263da6070b2 100644 --- a/mysql-test/suite/innodb/r/innodb_monitor.result +++ b/mysql-test/suite/innodb/r/innodb_monitor.result @@ -181,16 +181,8 @@ compress_pages_decompressed disabled compression_pad_increments disabled compression_pad_decrements disabled compress_saved disabled -compress_trim_sect512 disabled -compress_trim_sect1024 disabled -compress_trim_sect2048 disabled -compress_trim_sect4096 disabled -compress_trim_sect8192 disabled -compress_trim_sect16384 disabled -compress_trim_sect32768 disabled compress_pages_page_compressed disabled compress_page_compressed_trim_op disabled -compress_page_compressed_trim_op_saved disabled compress_pages_page_decompressed disabled compress_pages_page_compression_error disabled compress_pages_encrypted disabled diff --git a/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result b/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result index 4875dfaeb2a..f515cb047f1 100644 --- a/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result +++ b/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result @@ -216,16 +216,8 @@ compress_pages_decompressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL N compression_pad_increments compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times padding is incremented to avoid compression failures compression_pad_decrements compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times padding is decremented due to good compressibility compress_saved compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of bytes saved by page compression -compress_trim_sect512 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-512 TRIMed by page compression -compress_trim_sect1024 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-1024 TRIMed by page compression -compress_trim_sect2048 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-2048 TRIMed by page compression -compress_trim_sect4096 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-4K TRIMed by page compression -compress_trim_sect8192 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-8K TRIMed by page compression -compress_trim_sect16384 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-16K TRIMed by page compression -compress_trim_sect32768 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-32K TRIMed by page compression compress_pages_page_compressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages compressed by page compression compress_page_compressed_trim_op compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of TRIM operation performed by page compression -compress_page_compressed_trim_op_saved compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of TRIM operation saved by page compression compress_pages_page_decompressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages decompressed by page compression compress_pages_page_compression_error compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of page compression errors compress_pages_encrypted compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages encrypted diff --git a/mysql-test/suite/innodb/t/innodb-trim.opt b/mysql-test/suite/innodb/t/innodb-trim.opt new file mode 100644 index 00000000000..c33d075b002 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-trim.opt @@ -0,0 +1 @@ +--loose-innodb-use-trim=1 diff --git a/mysql-test/suite/innodb/t/innodb-trim.test b/mysql-test/suite/innodb/t/innodb-trim.test new file mode 100644 index 00000000000..1b64321116c --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-trim.test @@ -0,0 +1,44 @@ +--source include/have_innodb.inc +--source include/have_innodb_punchhole.inc + +--disable_query_log +--disable_warnings +let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`; +--enable_warnings +--enable_query_log + +# zlib +set global innodb_compression_algorithm = 1; + +create table innodb_page_compressed (c1 int not null primary key auto_increment, b char(200), c char(200), d char(200)) engine=innodb page_compressed=1 page_compression_level=9; +show warnings; + +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_page_compressed values (NULL,repeat('A',150),repeat('AB',75),repeat('B', 175)); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(16000); +commit; +set autocommit=1; + +let $wait_condition= SELECT variable_value > 5 FROM information_schema.global_status WHERE variable_name = 'innodb_num_page_compressed_trim_op'; +--source include/wait_condition.inc + +DROP PROCEDURE innodb_insert_proc; +DROP TABLE innodb_page_compressed; + +--disable_query_log +--disable_warnings +EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig; +--enable_warnings +--enable_query_log diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result index 2a66a0d0931..ccd8e482756 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result @@ -181,16 +181,8 @@ compress_pages_decompressed disabled compression_pad_increments disabled compression_pad_decrements disabled compress_saved disabled -compress_trim_sect512 disabled -compress_trim_sect1024 disabled -compress_trim_sect2048 disabled -compress_trim_sect4096 disabled -compress_trim_sect8192 disabled -compress_trim_sect16384 disabled -compress_trim_sect32768 disabled compress_pages_page_compressed disabled compress_page_compressed_trim_op disabled -compress_page_compressed_trim_op_saved disabled compress_pages_page_decompressed disabled compress_pages_page_compression_error disabled compress_pages_encrypted disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result index 2a66a0d0931..ccd8e482756 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result @@ -181,16 +181,8 @@ compress_pages_decompressed disabled compression_pad_increments disabled compression_pad_decrements disabled compress_saved disabled -compress_trim_sect512 disabled -compress_trim_sect1024 disabled -compress_trim_sect2048 disabled -compress_trim_sect4096 disabled -compress_trim_sect8192 disabled -compress_trim_sect16384 disabled -compress_trim_sect32768 disabled compress_pages_page_compressed disabled compress_page_compressed_trim_op disabled -compress_page_compressed_trim_op_saved disabled compress_pages_page_decompressed disabled compress_pages_page_compression_error disabled compress_pages_encrypted disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result index 2a66a0d0931..ccd8e482756 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result @@ -181,16 +181,8 @@ compress_pages_decompressed disabled compression_pad_increments disabled compression_pad_decrements disabled compress_saved disabled -compress_trim_sect512 disabled -compress_trim_sect1024 disabled -compress_trim_sect2048 disabled -compress_trim_sect4096 disabled -compress_trim_sect8192 disabled -compress_trim_sect16384 disabled -compress_trim_sect32768 disabled compress_pages_page_compressed disabled compress_page_compressed_trim_op disabled -compress_page_compressed_trim_op_saved disabled compress_pages_page_decompressed disabled compress_pages_page_compression_error disabled compress_pages_encrypted disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result index 2a66a0d0931..ccd8e482756 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result @@ -181,16 +181,8 @@ compress_pages_decompressed disabled compression_pad_increments disabled compression_pad_decrements disabled compress_saved disabled -compress_trim_sect512 disabled -compress_trim_sect1024 disabled -compress_trim_sect2048 disabled -compress_trim_sect4096 disabled -compress_trim_sect8192 disabled -compress_trim_sect16384 disabled -compress_trim_sect32768 disabled compress_pages_page_compressed disabled compress_page_compressed_trim_op disabled -compress_page_compressed_trim_op_saved disabled compress_pages_page_decompressed disabled compress_pages_page_compression_error disabled compress_pages_encrypted disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result index 63292f5d3c8..6ab0a19fb57 100644 --- a/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result @@ -1,12 +1,14 @@ SET @start_use_trim = @@global.innodb_use_trim; SELECT @start_use_trim; @start_use_trim -0 +1 SELECT COUNT(@@GLOBAL.innodb_use_trim); COUNT(@@GLOBAL.innodb_use_trim) 1 1 Expected SET @@GLOBAL.innodb_use_trim=1; +Warnings: +Warning 131 Using innodb_use_trim is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html SELECT COUNT(@@GLOBAL.innodb_use_trim); COUNT(@@GLOBAL.innodb_use_trim) 1 @@ -28,6 +30,8 @@ COUNT(VARIABLE_VALUE) 1 1 Expected SET @@global.innodb_use_trim = @start_use_trim; +Warnings: +Warning 131 Using innodb_use_trim is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html SELECT @@global.innodb_use_trim; @@global.innodb_use_trim -0 +1 diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index b4c7b2cc1fb..5954b057d2f 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -2612,12 +2612,12 @@ READ_ONLY YES COMMAND_LINE_ARGUMENT NONE VARIABLE_NAME INNODB_USE_TRIM SESSION_VALUE NULL -GLOBAL_VALUE OFF +GLOBAL_VALUE ON GLOBAL_VALUE_ORIGIN COMPILE-TIME -DEFAULT_VALUE OFF +DEFAULT_VALUE ON VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Use trim. Default FALSE. +VARIABLE_COMMENT Deallocate (punch_hole|trim) unused portions of the page compressed page (on by default) NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 315c951a9fe..0f1d170b172 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -35,6 +35,7 @@ Created 11/5/1995 Heikki Tuuri #include "page0size.h" #include "buf0buf.h" +#include "os0api.h" #ifdef UNIV_NONINL #include "buf0buf.ic" @@ -7659,4 +7660,30 @@ buf_page_decrypt_after_read( return (success); } + +/** +Should we punch hole to deallocate unused portion of the page. +@param[in] bpage Page control block +@return true if punch hole should be used, false if not */ +bool +buf_page_should_punch_hole( + const buf_page_t* bpage) +{ + return (bpage->real_size != bpage->size.physical()); +} + +/** +Calculate the length of trim (punch_hole) operation. +@param[in] bpage Page control block +@param[in] write_length Write length +@return length of the trim or zero. */ +ulint +buf_page_get_trim_length( + const buf_page_t* bpage, + ulint write_length) +{ + return (bpage->size.physical() - write_length); +} + + #endif /* !UNIV_INNOCHECKSUM */ diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index 4deee54d97f..4f83921a553 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -591,7 +591,7 @@ buf_dblwr_process(void) dberr_t err = fil_io( request, true, page_id, page_size, - 0, page_size.physical(), read_buf, NULL, NULL); + 0, page_size.physical(), read_buf, NULL); if (err != DB_SUCCESS) { ib::warn() @@ -679,7 +679,7 @@ buf_dblwr_process(void) fil_io(write_request, true, page_id, page_size, 0, page_size.physical(), - const_cast(page), NULL, NULL); + const_cast(page), NULL); ib::info() << "Recovered page " << page_id << " from the doublewrite buffer."; @@ -912,7 +912,7 @@ buf_dblwr_write_block_to_datafile( type |= IORequest::DO_NOT_WAKE; } - IORequest request(type); + IORequest request(type, const_cast(bpage)); /* We request frame here to get correct buffer in case of encryption and/or page compression */ @@ -924,7 +924,7 @@ buf_dblwr_write_block_to_datafile( fil_io(request, sync, bpage->id, bpage->size, 0, bpage->size.physical(), (void*) frame, - (void*) bpage, NULL); + (void*) bpage); } else { ut_ad(!bpage->size.is_compressed()); @@ -938,8 +938,8 @@ buf_dblwr_write_block_to_datafile( buf_dblwr_check_page_lsn(block->frame); fil_io(request, - sync, bpage->id, bpage->size, 0, bpage->size.physical(), - frame, block, (ulint *)&bpage->write_size); + sync, bpage->id, bpage->size, 0, bpage->real_size, + frame, block); } } @@ -1041,7 +1041,7 @@ try_again: fil_io(IORequestWrite, true, page_id_t(TRX_SYS_SPACE, buf_dblwr->block1), univ_page_size, - 0, len, (void*) write_buf, NULL, NULL); + 0, len, (void*) write_buf, NULL); if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { /* No unwritten pages in the second block. */ @@ -1057,7 +1057,7 @@ try_again: fil_io(IORequestWrite, true, page_id_t(TRX_SYS_SPACE, buf_dblwr->block2), univ_page_size, - 0, len, (void*) write_buf, NULL, NULL); + 0, len, (void*) write_buf, NULL); flush: /* increment the doublewrite flushed pages counter */ @@ -1292,7 +1292,6 @@ retry: 0, univ_page_size.physical(), (void *)(buf_dblwr->write_buf + univ_page_size.physical() * i), - NULL, NULL); } else { /* It is a regular page. Write it directly to the @@ -1304,7 +1303,6 @@ retry: 0, univ_page_size.physical(), (void*) frame, - NULL, NULL); } diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 2738fbd0ec7..5fdb735e0d3 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1093,11 +1093,11 @@ buf_flush_write_block_low( ulint type = IORequest::WRITE | IORequest::DO_NOT_WAKE; - IORequest request(type); + IORequest request(type, bpage); fil_io(request, sync, bpage->id, bpage->size, 0, bpage->size.physical(), - frame, bpage, NULL); + frame, bpage); } else { if (flush_type == BUF_FLUSH_SINGLE_PAGE) { buf_dblwr_write_single_page(bpage, sync); diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index ea3c1ceccf9..4d68ad5ac51 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -182,7 +182,7 @@ buf_read_page_low( *err = fil_io( request, sync, page_id, page_size, 0, page_size.physical(), - dst, bpage, NULL); + dst, bpage); if (sync) { thd_wait_end(NULL); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index a0442808eaa..75067bc075e 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -58,6 +58,7 @@ Created 10/25/1995 Heikki Tuuri #include "srv0start.h" #include "trx0purge.h" #include "ut0new.h" +#include "os0api.h" /** Tries to close a file in the LRU list. The caller must hold the fil_sys mutex. @@ -280,7 +281,7 @@ fil_read( void* buf) { return(fil_io(IORequestRead, true, page_id, page_size, - byte_offset, len, buf, NULL, NULL)); + byte_offset, len, buf, NULL)); } /** Writes data to a space from a buffer. Remember that the possible incomplete @@ -308,7 +309,7 @@ fil_write( ut_ad(!srv_read_only_mode); return(fil_io(IORequestWrite, true, page_id, page_size, - byte_offset, len, buf, NULL, NULL)); + byte_offset, len, buf, NULL)); } /*******************************************************************//** @@ -524,20 +525,6 @@ fil_node_create_low( node->space = space; - os_file_stat_t stat_info; - -#ifdef UNIV_DEBUG - dberr_t err = -#endif /* UNIV_DEBUG */ - - os_file_get_status( - node->name, &stat_info, false, - fsp_is_system_temporary(space->id) ? true : srv_read_only_mode); - - ut_ad(err == DB_SUCCESS); - - node->block_size = stat_info.block_size; - node->atomic_write = atomic_write; UT_LIST_ADD_LAST(space->chain, node); @@ -1043,7 +1030,7 @@ fil_write_zeros( err = os_aio( request, OS_AIO_SYNC, node->name, node->handle, buf, offset, n_bytes, read_only_mode, - NULL, NULL, NULL); + NULL, NULL); if (err != DB_SUCCESS) { break; @@ -3758,12 +3745,31 @@ fil_ibd_create( success = true; } #endif /* HAVE_POSIX_FALLOCATE */ - if (!success) - { + + if (!success) { success = os_file_set_size( path, file, size * UNIV_PAGE_SIZE, srv_read_only_mode); } + /* Note: We are actually punching a hole, previous contents will + be lost after this call, if it succeeds. In this case the file + should be full of NULs. */ + + bool punch_hole = os_is_sparse_file_supported(path, file); + + if (punch_hole) { + + dberr_t punch_err; + + punch_err = os_file_punch_hole(file, 0, size * UNIV_PAGE_SIZE); + + if (punch_err != DB_SUCCESS) { + punch_hole = false; + } + } + + ulint block_size = os_file_get_block_size(file, path); + if (!success) { os_file_close(file); os_file_delete(innodb_data_file_key, path); @@ -3866,7 +3872,13 @@ fil_ibd_create( space = fil_space_create(name, space_id, flags, FIL_TYPE_TABLESPACE, crypt_data, true); - if (!fil_node_create_low(path, size, space, false, true)) { + fil_node_t* node = NULL; + + if (space) { + node = fil_node_create_low(path, size, space, false, true); + } + + if (!space || !node) { if (crypt_data) { free(crypt_data); } @@ -3883,6 +3895,9 @@ fil_ibd_create( fil_name_write(space, 0, file, &mtr); mtr.commit(); + node->block_size = block_size; + space->punch_hole = punch_hole; + err = DB_SUCCESS; } @@ -5038,8 +5053,6 @@ fil_report_invalid_page_access( aligned @param[in] message message for aio handler if non-sync aio used, else ignored -@param[in] write_size actual payload size when written - to avoid extra punch holes in compression @return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED if we are trying to do i/o on a tablespace which does not exist */ dberr_t @@ -5051,8 +5064,7 @@ fil_io( ulint byte_offset, ulint len, void* buf, - void* message, - ulint* write_size) + void* message) { os_offset_t offset; IORequest req_type(type); @@ -5285,7 +5297,7 @@ fil_io( const char* name = node->name == NULL ? space->name : node->name; - req_type.block_size(node->block_size); + req_type.set_fil_node(node); /* Queue the aio request */ dberr_t err = os_aio( @@ -5293,7 +5305,7 @@ fil_io( mode, name, node->handle, buf, offset, len, space->purpose != FIL_TYPE_TEMPORARY && srv_read_only_mode, - node, message, write_size); + node, message); /* We an try to recover the page from the double write buffer if the decompression fails or the page is corrupt. */ @@ -6973,3 +6985,26 @@ fil_system_exit(void) ut_ad(mutex_own(&fil_system->mutex)); mutex_exit(&fil_system->mutex); } + +/** +Get should we punch hole to tablespace. +@param[in] node File node +@return true, if punch hole should be tried, false if not. */ +bool +fil_node_should_punch_hole( + const fil_node_t* node) +{ + return (node->space->punch_hole); +} + +/** +Set punch hole to tablespace to given value. +@param[in] node File node +@param[in] val value to be set. */ +void +fil_space_set_punch_hole( + fil_node_t* node, + bool val) +{ + node->space->punch_hole = val; +} diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index a5fd7788af2..271abc3e86d 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -927,6 +927,7 @@ static ibool innodb_have_lz4=IF_LZ4(1, 0); static ibool innodb_have_lzma=IF_LZMA(1, 0); static ibool innodb_have_bzip2=IF_BZIP2(1, 0); static ibool innodb_have_snappy=IF_SNAPPY(1, 0); +static ibool innodb_have_punch_hole=IF_PUNCH_HOLE(1, 0); static int @@ -1134,20 +1135,6 @@ static SHOW_VAR innodb_status_variables[]= { /* Status variables for page compression */ {"page_compression_saved", (char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG}, - {"page_compression_trim_sect512", - (char*) &export_vars.innodb_page_compression_trim_sect512, SHOW_LONGLONG}, - {"page_compression_trim_sect1024", - (char*) &export_vars.innodb_page_compression_trim_sect1024, SHOW_LONGLONG}, - {"page_compression_trim_sect2048", - (char*) &export_vars.innodb_page_compression_trim_sect2048, SHOW_LONGLONG}, - {"page_compression_trim_sect4096", - (char*) &export_vars.innodb_page_compression_trim_sect4096, SHOW_LONGLONG}, - {"page_compression_trim_sect8192", - (char*) &export_vars.innodb_page_compression_trim_sect8192, SHOW_LONGLONG}, - {"page_compression_trim_sect16384", - (char*) &export_vars.innodb_page_compression_trim_sect16384, SHOW_LONGLONG}, - {"page_compression_trim_sect32768", - (char*) &export_vars.innodb_page_compression_trim_sect32768, SHOW_LONGLONG}, {"num_index_pages_written", (char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG}, {"num_non_index_pages_written", @@ -1156,8 +1143,6 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG}, {"num_page_compressed_trim_op", (char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG}, - {"num_page_compressed_trim_op_saved", - (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG}, {"num_pages_page_decompressed", (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG}, {"num_pages_page_compression_error", @@ -1176,6 +1161,8 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &innodb_have_bzip2, SHOW_BOOL}, {"have_snappy", (char*) &innodb_have_snappy, SHOW_BOOL}, + {"have_punch_hole", + (char*) &innodb_have_punch_hole, SHOW_BOOL}, /* Defragmentation */ {"defragment_compression_failures", @@ -3830,6 +3817,10 @@ static const char* deprecated_file_format_check static const char* deprecated_file_format_max = DEPRECATED_FORMAT_PARAMETER("innodb_file_format_max"); +/** Deprecation message about innodb_use_trim */ +static const char* deprecated_use_trim + = DEPRECATED_FORMAT_PARAMETER("innodb_use_trim"); + /** Update log_checksum_algorithm_ptr with a pointer to the function corresponding to whether checksums are enabled. @param[in] check whether redo log block checksums are enabled */ @@ -20660,6 +20651,25 @@ wsrep_fake_trx_id( #endif /* WITH_WSREP */ +/** Update the innodb_use_trim parameter. +@param[in] thd thread handle +@param[in] var system variable +@param[out] var_ptr current value +@param[in] save immediate result from check function */ +static +void +innodb_use_trim_update( + THD* thd, + struct st_mysql_sys_var* var, + void* var_ptr, + const void* save) +{ + srv_use_trim = *static_cast(save); + + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_WRONG_COMMAND, deprecated_use_trim); +} + /* plugin options */ static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm, @@ -21761,8 +21771,8 @@ static MYSQL_SYSVAR_BOOL(force_primary_key, static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim, PLUGIN_VAR_OPCMDARG, - "Use trim. Default FALSE.", - NULL, NULL, FALSE); + "Deallocate (punch_hole|trim) unused portions of the page compressed page (on by default)", + NULL, innodb_use_trim_update, TRUE); static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 }; static TYPELIB page_compression_algorithms_typelib= diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h index 316d90bec34..972d99553b9 100644 --- a/storage/innobase/include/db0err.h +++ b/storage/innobase/include/db0err.h @@ -160,6 +160,9 @@ enum dberr_t { placed on the base column of stored column */ + DB_IO_NO_PUNCH_HOLE, /*!< Punch hole not supported by + file system. */ + /* The following are partial failure codes */ DB_FAIL = 1000, DB_OVERFLOW, diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 7428ff2c936..bd6067fbbee 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -185,6 +185,10 @@ struct fil_space_t { @param[in] n_reserved number of reserved extents */ void release_free_extents(ulint n_reserved); + /** True if file system storing this tablespace supports + punch hole */ + bool punch_hole; + ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ }; @@ -229,12 +233,12 @@ struct fil_node_t { /** link to the fil_system->LRU list (keeping track of open files) */ UT_LIST_NODE_T(fil_node_t) LRU; - /** block size to use for punching holes */ - ulint block_size; - /** whether this file could use atomic write (data file) */ bool atomic_write; + /** Filesystem block size */ + ulint block_size; + /** FIL_NODE_MAGIC_N */ ulint magic_n; }; @@ -1129,11 +1133,6 @@ fil_space_get_n_reserved_extents( aligned @param[in] message message for aio handler if non-sync aio used, else ignored -@param[in,out] write_size Actual write size initialized - after fist successfull trim - operation for this page and if - nitialized we do not trim again if - Actual page @return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED if we are trying to do i/o on a tablespace which does not exist */ @@ -1146,8 +1145,7 @@ fil_io( ulint byte_offset, ulint len, void* buf, - void* message, - ulint* write_size); + void* message); /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the handler for completed requests. The aio array of pending requests is divided diff --git a/storage/innobase/include/os0api.h b/storage/innobase/include/os0api.h new file mode 100644 index 00000000000..ea2a113bdec --- /dev/null +++ b/storage/innobase/include/os0api.h @@ -0,0 +1,75 @@ +/*********************************************************************** + +Copyright (c) 2017, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +***********************************************************************/ + +/**************************************************//** +@file os0api.h +The interface to the helper functions. +These functions are used on os0file.h where +including full full header is not feasible and +implemented on buf0buf.cc and fil0fil.cc. +*******************************************************/ + +#ifndef OS_API_H +#define OS_API_H 1 + +/** Page control block */ +struct buf_page_t; + +/** File Node */ +struct fil_node_t; + +/** +Should we punch hole to deallocate unused portion of the page. +@param[in] bpage Page control block +@return true if punch hole should be used, false if not */ +bool +buf_page_should_punch_hole( + const buf_page_t* bpage) + MY_ATTRIBUTE((warn_unused_result)); + +/** +Calculate the length of trim (punch_hole) operation. +@param[in] bpage Page control block +@param[in] write_length Write length +@return length of the trim or zero. */ +ulint +buf_page_get_trim_length( + const buf_page_t* bpage, + ulint write_length) + MY_ATTRIBUTE((warn_unused_result)); + +/** +Get should we punch hole to tablespace. +@param[in] space Tablespace +@return true, if punch hole should be tried, false if not. */ +bool +fil_node_should_punch_hole( + const fil_node_t* node) + MY_ATTRIBUTE((warn_unused_result)); + +/** +Set punch hole to tablespace to given value. +@param[in] space Tablespace +@param[in] val value to be set. */ +void +fil_space_set_punch_hole( + fil_node_t* node, + bool val); + +#endif /* OS_API_H */ diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 57ee015dfdd..6a97ff3aa53 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -36,7 +36,8 @@ Created 10/21/1995 Heikki Tuuri #ifndef os0file_h #define os0file_h -#include "univ.i" +#include "page0size.h" +#include "os0api.h" #ifndef _WIN32 #include @@ -46,8 +47,10 @@ Created 10/21/1995 Heikki Tuuri /** File node of a tablespace or the log data space */ struct fil_node_t; +struct fil_space_t; extern bool os_has_said_disk_full; +extern my_bool srv_use_trim; /** Number of pending read operations */ extern ulint os_n_pending_reads; @@ -177,6 +180,8 @@ static const ulint OS_FILE_ERROR_MAX = 200; #define IORequestLogRead IORequest(IORequest::LOG | IORequest::READ) #define IORequestLogWrite IORequest(IORequest::LOG | IORequest::WRITE) + + /** The IO Context that is passed down to the low level IO code */ class IORequest { @@ -211,12 +216,16 @@ public: /** Ignore failed reads of non-existent pages */ IGNORE_MISSING = 128, + + /** Use punch hole if available*/ + PUNCH_HOLE = 256, }; /** Default constructor */ IORequest() : - m_block_size(UNIV_SECTOR_SIZE), + m_bpage(NULL), + m_fil_node(NULL), m_type(READ) { /* No op */ @@ -227,9 +236,32 @@ public: ORed from the above enum */ explicit IORequest(ulint type) : - m_block_size(UNIV_SECTOR_SIZE), + m_bpage(NULL), + m_fil_node(NULL), m_type(static_cast(type)) { + if (!is_punch_hole_supported() || !srv_use_trim) { + clear_punch_hole(); + } + } + + /** + @param[in] type Request type, can be a value that is + ORed from the above enum + @param[in] bpage Page to be written */ + IORequest(ulint type, buf_page_t* bpage) + : + m_bpage(bpage), + m_fil_node(NULL), + m_type(static_cast(type)) + { + if (bpage && buf_page_should_punch_hole(bpage)) { + set_punch_hole(); + } + + if (!is_punch_hole_supported() || !srv_use_trim) { + clear_punch_hole(); + } } /** Destructor */ @@ -270,6 +302,12 @@ public: return((m_type & DO_NOT_WAKE) == 0); } + /** Clear the punch hole flag */ + void clear_punch_hole() + { + m_type &= ~PUNCH_HOLE; + } + /** @return true if partial read warning disabled */ bool is_partial_io_warning_disabled() const MY_ATTRIBUTE((warn_unused_result)) @@ -291,6 +329,13 @@ public: return(ignore_missing(m_type)); } + /** @return true if punch hole should be used */ + bool punch_hole() const + MY_ATTRIBUTE((warn_unused_result)) + { + return((m_type & PUNCH_HOLE) == PUNCH_HOLE); + } + /** @return true if the read should be validated */ bool validate() const MY_ATTRIBUTE((warn_unused_result)) @@ -298,24 +343,30 @@ public: return(is_read() ^ is_write()); } + /** Set the punch hole flag */ + void set_punch_hole() + { + if (is_punch_hole_supported() && srv_use_trim) { + m_type |= PUNCH_HOLE; + } + } + /** Clear the do not wake flag */ void clear_do_not_wake() { m_type &= ~DO_NOT_WAKE; } - /** @return the block size to use for IO */ - ulint block_size() const - MY_ATTRIBUTE((warn_unused_result)) + /** Set the pointer to file node for IO + @param[in] node File node */ + void set_fil_node(fil_node_t* node) { - return(m_block_size); - } + if (!srv_use_trim || + (node && !fil_node_should_punch_hole(node))) { + clear_punch_hole(); + } - /** Set the block size for IO - @param[in] block_size Block size to set */ - void block_size(ulint block_size) - { - m_block_size = static_cast(block_size); + m_fil_node = node; } /** Compare two requests @@ -338,9 +389,59 @@ public: return((m_type & DBLWR_RECOVER) == DBLWR_RECOVER); } + /** @return true if punch hole is supported */ + static bool is_punch_hole_supported() + { + + /* In this debugging mode, we act as if punch hole is supported, + and then skip any calls to actually punch a hole here. + In this way, Transparent Page Compression is still being tested. */ + DBUG_EXECUTE_IF("ignore_punch_hole", + return(true); + ); + +#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32) + return(true); +#else + return(false); +#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || _WIN32 */ + } + + ulint get_trim_length(ulint write_length) const + { + return (m_bpage ? + buf_page_get_trim_length(m_bpage, write_length) + : 0); + } + + bool should_punch_hole() const { + return (m_fil_node ? + fil_node_should_punch_hole(m_fil_node) + : false); + } + + void space_no_punch_hole() const { + if (m_fil_node) { + fil_space_set_punch_hole(m_fil_node, false); + } + } + + /** Punch a hole in the file if it was a write + @param[in] fh Open file handle + @param[in] len Compressed buffer length for write + @return DB_SUCCESS or error code */ + + dberr_t punch_hole( + os_file_t fh, + ulint offset, + ulint len); + private: - /* File system best block size */ - uint32_t m_block_size; + /** Page to be written on write operation. */ + buf_page_t* m_bpage; + + /** File node */ + fil_node_t* m_fil_node; /** Request type bit flags */ uint16_t m_type; @@ -706,10 +807,10 @@ The wrapper functions have the prefix of "innodb_". */ # define os_file_close(file) \ pfs_os_file_close_func(file, __FILE__, __LINE__) -# define os_aio(type, mode, name, file, buf, offset, \ - n, read_only, message1, message2, wsize) \ - pfs_os_aio_func(type, mode, name, file, buf, offset, \ - n, read_only, message1, message2, wsize, \ +# define os_aio(type, mode, name, file, buf, offset, \ + n, read_only, message1, message2) \ + pfs_os_aio_func(type, mode, name, file, buf, offset, \ + n, read_only, message1, message2, \ __FILE__, __LINE__) # define os_file_read(type, file, buf, offset, n) \ @@ -721,7 +822,7 @@ The wrapper functions have the prefix of "innodb_". */ # define os_file_write(type, name, file, buf, offset, n) \ pfs_os_file_write_func(type, name, file, buf, offset, \ - n, __FILE__, __LINE__) + n,__FILE__, __LINE__) # define os_file_flush(file) \ pfs_os_file_flush_func(file, __FILE__, __LINE__) @@ -926,7 +1027,6 @@ pfs_os_aio_func( bool read_only, fil_node_t* m1, void* m2, - ulint* wsize, const char* src_file, ulint src_line); @@ -1051,9 +1151,9 @@ to original un-instrumented file I/O APIs */ # define os_file_close(file) os_file_close_func(file) # define os_aio(type, mode, name, file, buf, offset, \ - n, read_only, message1, message2, wsize) \ + n, read_only, message1, message2) \ os_aio_func(type, mode, name, file, buf, offset, \ - n, read_only, message1, message2, wsize) + n, read_only, message1, message2) # define os_file_read(type, file, buf, offset, n) \ os_file_read_func(type, file, buf, offset, n) @@ -1061,7 +1161,7 @@ to original un-instrumented file I/O APIs */ # define os_file_read_no_error_handling(type, file, buf, offset, n, o) \ os_file_read_no_error_handling_func(type, file, buf, offset, n, o) -# define os_file_write(type, name, file, buf, offset, n) \ +# define os_file_write(type, name, file, buf, offset, n) \ os_file_write_func(type, name, file, buf, offset, n) # define os_file_flush(file) os_file_flush_func(file) @@ -1324,8 +1424,7 @@ os_aio_func( ulint n, bool read_only, fil_node_t* m1, - void* m2, - ulint* wsize); + void* m2); /** Wakes up all async i/o threads so that they know to exit themselves in shutdown. */ @@ -1427,6 +1526,48 @@ innobase_mysql_tmpfile( void os_file_set_umask(ulint umask); +/** Check if the file system supports sparse files. + +Warning: On POSIX systems we try and punch a hole from offset 0 to +the system configured page size. This should only be called on an empty +file. + +Note: On Windows we use the name and on Unices we use the file handle. + +@param[in] name File name +@param[in] fh File handle for the file - if opened +@return true if the file system supports sparse files */ +bool +os_is_sparse_file_supported( + const char* path, + os_file_t fh) + MY_ATTRIBUTE((warn_unused_result)); + +/** Free storage space associated with a section of the file. +@param[in] fh Open file handle +@param[in] off Starting offset (SEEK_SET) +@param[in] len Size of the hole +@return DB_SUCCESS or error code */ +dberr_t +os_file_punch_hole( + IORequest& type, + os_file_t fh, + os_offset_t off, + os_offset_t len) + MY_ATTRIBUTE((warn_unused_result)); + +/** Free storage space associated with a section of the file. +@param[in] fh Open file handle +@param[in] off Starting offset (SEEK_SET) +@param[in] len Size of the hole +@return DB_SUCCESS or error code */ +dberr_t +os_file_punch_hole( + os_file_t fh, + os_offset_t off, + os_offset_t len) + MY_ATTRIBUTE((warn_unused_result)); + /** Normalizes a directory path for the current OS: On Windows, we convert '/' to '\', else we convert '\' to '/'. @param[in,out] str A null-terminated directory and file path */ @@ -1454,6 +1595,16 @@ is_absolute_path( return(false); } +/***********************************************************************//** +Try to get number of bytes per sector from file system. +@return file block size */ +UNIV_INTERN +ulint +os_file_get_block_size( +/*===================*/ + os_file_t file, /*!< in: handle to a file */ + const char* name); /*!< in: file name */ + #ifndef UNIV_NONINL #include "os0file.ic" #endif /* UNIV_NONINL */ diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic index 3e78b87a177..5c7c4d45ca6 100644 --- a/storage/innobase/include/os0file.ic +++ b/storage/innobase/include/os0file.ic @@ -219,11 +219,6 @@ an asynchronous i/o operation. @param[in,out] m2 message for the AIO handler (can be used to identify a completed AIO operation); ignored if mode is OS_AIO_SYNC -@param[in,out] write_size Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size @param[in] src_file file name where func invoked @param[in] src_line line where the func invoked @return DB_SUCCESS if request was queued successfully, FALSE if fail */ @@ -240,7 +235,6 @@ pfs_os_aio_func( bool read_only, fil_node_t* m1, void* m2, - ulint* write_size, const char* src_file, ulint src_line) { @@ -256,7 +250,7 @@ pfs_os_aio_func( src_file, src_line); dberr_t result = os_aio_func( - type, mode, name, file, buf, offset, n, read_only, m1, m2, write_size); + type, mode, name, file, buf, offset, n, read_only, m1, m2); register_pfs_file_io_end(locker, n); diff --git a/storage/innobase/include/page0size.h b/storage/innobase/include/page0size.h index ab917e1ff05..ca173db9b6d 100644 --- a/storage/innobase/include/page0size.h +++ b/storage/innobase/include/page0size.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -139,9 +140,7 @@ public: @param[in] src page size object whose values to fetch */ inline void copy_from(const page_size_t& src) { - m_physical = src.physical(); - m_logical = src.logical(); - m_is_compressed = src.is_compressed(); + *this = src; } /** Check if a given page_size_t object is equal to the current one. @@ -156,9 +155,6 @@ public: private: - /* Disable implicit copying. */ - void operator=(const page_size_t&); - /* For non compressed tablespaces, physical page size is equal to the logical page size and the data is stored in buf_page_t::frame (and is also always equal to univ_page_size (--innodb-page-size=)). diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h index e4e1394c2d3..5c19e735806 100644 --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -343,16 +343,8 @@ enum monitor_id_t { MONITOR_PAD_DECREMENTS, /* New monitor variables for page compression */ MONITOR_OVLD_PAGE_COMPRESS_SAVED, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768, MONITOR_OVLD_PAGES_PAGE_COMPRESSED, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP, - MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR, diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 227bcfb7781..ca81ad46b8b 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -114,20 +114,6 @@ struct srv_stats_t { /** Number of bytes saved by page compression */ ulint_ctr_64_t page_compression_saved; - /** Number of 512Byte TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect512; - /** Number of 1K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect1024; - /** Number of 2K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect2048; - /** Number of 4K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect4096; - /** Number of 8K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect8192; - /** Number of 16K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect16384; - /** Number of 32K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect32768; /* Number of index pages written */ ulint_ctr_64_t index_pages_written; /* Number of non index pages written */ @@ -136,8 +122,6 @@ struct srv_stats_t { ulint_ctr_64_t pages_page_compressed; /* Number of TRIM operations induced by page compression */ ulint_ctr_64_t page_compressed_trim_op; - /* Number of TRIM operations saved by using actual write size knowledge */ - ulint_ctr_64_t page_compressed_trim_op_saved; /* Number of pages decompressed with page compression */ ulint_ctr_64_t pages_page_decompressed; /* Number of page compression errors */ @@ -1059,20 +1043,6 @@ struct export_var_t{ int64_t innodb_page_compression_saved;/*!< Number of bytes saved by page compression */ - int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM - by page compression */ - int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM - by page compression */ - int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM - by page compression */ - int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM - by page compression */ - int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM - by page compression */ - int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM - by page compression */ - int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM - by page compression */ int64_t innodb_index_pages_written; /*!< Number of index pages written */ int64_t innodb_non_index_pages_written; /*!< Number of non index pages @@ -1081,8 +1051,6 @@ struct export_var_t{ compressed by page compression */ int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations induced by page compression */ - int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations - saved by page compression */ int64_t innodb_pages_page_decompressed;/*!< Number of pages decompressed by page compression */ diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 35ea9fd51be..908fb60e956 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -366,6 +366,12 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t; #define IF_SNAPPY(A,B) B #endif +#if defined (HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32) +#define IF_PUNCH_HOLE(A,B) A +#else +#define IF_PUNCH_HOLE(A,B) B +#endif + /** The universal page size of the database */ #define UNIV_PAGE_SIZE ((ulint) srv_page_size) diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index cf7825bd542..39afc4e9680 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -1019,7 +1019,7 @@ log_group_file_header_flush( page_id_t(group->space_id, page_no), univ_page_size, (ulint) (dest_offset % univ_page_size.physical()), - OS_FILE_LOG_BLOCK_SIZE, buf, group, NULL); + OS_FILE_LOG_BLOCK_SIZE, buf, group); srv_stats.os_log_pending_writes.dec(); } @@ -1144,7 +1144,7 @@ loop: page_id_t(group->space_id, page_no), univ_page_size, (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf, - group, NULL); + group); srv_stats.os_log_pending_writes.dec(); @@ -1664,7 +1664,7 @@ log_group_checkpoint( (log_sys->next_checkpoint_no & 1) ? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1, OS_FILE_LOG_BLOCK_SIZE, - buf, (byte*) group + 1, NULL); + buf, (byte*) group + 1); ut_ad(((ulint) group & 0x1UL) == 0); } @@ -1686,7 +1686,7 @@ log_group_header_read( fil_io(IORequestLogRead, true, page_id_t(group->space_id, header / univ_page_size.physical()), univ_page_size, header % univ_page_size.physical(), - OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, NULL); + OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL); } /** Write checkpoint info to the log header and invoke log_mutex_exit(). @@ -2038,7 +2038,7 @@ loop: page_id_t(group->space_id, page_no), univ_page_size, (ulint) (source_offset % univ_page_size.physical()), - len, buf, NULL, NULL); + len, buf, NULL); #ifdef DEBUG_CRYPT fprintf(stderr, "BEFORE DECRYPT: block: %lu checkpoint: %lu %.8lx %.8lx offset %lu\n", diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index e5aab543f5d..ce5b37565af 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -869,7 +869,7 @@ recv_log_format_0_recover(lsn_t lsn) univ_page_size, (ulint) ((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1)) % univ_page_size.physical()), - OS_FILE_LOG_BLOCK_SIZE, buf, NULL, NULL); + OS_FILE_LOG_BLOCK_SIZE, buf, NULL); if (log_block_calc_checksum_format_0(buf) != log_block_get_checksum(buf)) { diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 71a9a856571..f305de38e01 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -44,6 +44,11 @@ Created 10/21/1995 Heikki Tuuri #include "os0file.ic" #endif +#ifdef UNIV_LINUX +#include +#include +#endif + #include "srv0srv.h" #include "srv0start.h" #include "fil0fil.h" @@ -63,17 +68,23 @@ Created 10/21/1995 Heikki Tuuri #include #endif /* LINUX_NATIVE_AIO */ -#ifdef HAVE_LZ4 -#include -#endif - -#include +#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE +# include +# include +#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */ #ifdef UNIV_DEBUG /** Set when InnoDB has invoked exit(). */ bool innodb_calling_exit; #endif /* UNIV_DEBUG */ +#if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H) +# include +# ifndef DFS_IOCTL_ATOMIC_WRITE_SET +# define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint) +# endif +#endif + #if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H) #include #endif @@ -82,12 +93,8 @@ bool innodb_calling_exit; #include #endif -#ifdef HAVE_LZO -#include "lzo/lzo1x.h" -#endif - -#ifdef HAVE_SNAPPY -#include "snappy-c.h" +#ifdef _WIN32 +#include #endif /** Insert buffer segment id */ @@ -216,8 +223,6 @@ struct Slot { /** buffer used in i/o */ byte* buf; - ulint is_log; /*!< 1 if OS_FILE_LOG or 0 */ - ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */ /** Buffer pointer used for actual IO. We advance this when partial IO is required and not buf */ @@ -286,7 +291,6 @@ struct Slot { /** Length of the block before it was compressed */ uint32 original_len; - ulint* write_size; }; /** The asynchronous i/o array structure */ @@ -328,8 +332,7 @@ public: const char* name, void* buf, os_offset_t offset, - ulint len, - ulint* write_size) + ulint len) MY_ATTRIBUTE((warn_unused_result)); /** @return number of reserved slots */ @@ -759,6 +762,107 @@ os_aio_simulated_handler( void** m2, IORequest* type); +#ifdef _WIN32 +static HANDLE win_get_syncio_event(); +#endif + +#ifdef _WIN32 +/** + Wrapper around Windows DeviceIoControl() function. + + Works synchronously, also in case for handle opened + for async access (i.e with FILE_FLAG_OVERLAPPED). + + Accepts the same parameters as DeviceIoControl(),except + last parameter (OVERLAPPED). +*/ +static +BOOL +os_win32_device_io_control( + HANDLE handle, + DWORD code, + LPVOID inbuf, + DWORD inbuf_size, + LPVOID outbuf, + DWORD outbuf_size, + LPDWORD bytes_returned +) +{ + OVERLAPPED overlapped = { 0 }; + overlapped.hEvent = win_get_syncio_event(); + BOOL result = DeviceIoControl(handle, code, inbuf, inbuf_size, outbuf, + outbuf_size, bytes_returned, &overlapped); + + if (!result && (GetLastError() == ERROR_IO_PENDING)) { + /* Wait for async io to complete */ + result = GetOverlappedResult(handle, &overlapped, bytes_returned, TRUE); + } + + return result; +} + +#endif + +/***********************************************************************//** +Try to get number of bytes per sector from file system. +@return file block size */ +UNIV_INTERN +ulint +os_file_get_block_size( +/*===================*/ + os_file_t file, /*!< in: handle to a file */ + const char* name) /*!< in: file name */ +{ + ulint fblock_size = 512; + +#if defined(UNIV_LINUX) + struct stat local_stat; + int err; + + err = fstat((int)file, &local_stat); + + if (err != 0) { + os_file_handle_error_no_exit(name, "fstat()", FALSE); + } else { + fblock_size = local_stat.st_blksize; + } +#endif /* UNIV_LINUX */ +#ifdef _WIN32 + DWORD outsize; + STORAGE_PROPERTY_QUERY storageQuery; + memset(&storageQuery, 0, sizeof(storageQuery)); + storageQuery.PropertyId = StorageAccessAlignmentProperty; + storageQuery.QueryType = PropertyStandardQuery; + STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR diskAlignment; + + BOOL result = os_win32_device_io_control(file, + IOCTL_STORAGE_QUERY_PROPERTY, + &storageQuery, + sizeof(STORAGE_PROPERTY_QUERY), + &diskAlignment, + sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR), + &outsize); + + if (!result) { + os_file_handle_error_no_exit(name, "DeviceIoControl()", FALSE); + fblock_size = 0; + } + + fblock_size = diskAlignment.BytesPerPhysicalSector; +#endif /* _WIN32 */ + + /* Currently we support file block size up to 4Kb */ + if (fblock_size > 4096 || fblock_size < 512) { + if (fblock_size < 512) { + fblock_size = 512; + } else { + fblock_size = 4096; + } + } + + return fblock_size; +} + #ifdef WIN_ASYNC_IO /** This function is only used in Windows asynchronous i/o. Waits for an aio operation to complete. This function is used to wait the @@ -1443,6 +1547,48 @@ SyncFileIO::execute(const IORequest& request) return(n_bytes); } +/** Free storage space associated with a section of the file. +@param[in] fh Open file handle +@param[in] off Starting offset (SEEK_SET) +@param[in] len Size of the hole +@return DB_SUCCESS or error code */ +static +dberr_t +os_file_punch_hole_posix( + os_file_t fh, + os_offset_t off, + os_offset_t len) +{ + +#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE + const int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + + int ret = fallocate(fh, mode, off, len); + + if (ret == 0) { + return(DB_SUCCESS); + } + + if (errno == ENOTSUP) { + return(DB_IO_NO_PUNCH_HOLE); + } + + ib::warn() + << "fallocate(" + <<", FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, " + << off << ", " << len << ") returned errno: " + << errno; + + return(DB_IO_ERROR); + +#elif defined(UNIV_SOLARIS) + + // Use F_FREESP + +#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */ + + return(DB_IO_NO_PUNCH_HOLE); +} #if defined(LINUX_NATIVE_AIO) @@ -1734,7 +1880,18 @@ LinuxAIOHandler::collect() /* We have not overstepped to next segment. */ ut_a(slot->pos < end_pos); - slot->err = DB_SUCCESS; + /* Deallocate unused blocks from file system. + This is newer done to page 0 or to log files.*/ + if (slot->offset > 0 + && !slot->skip_punch_hole + && !slot->type.is_log() + && slot->type.is_write() + && slot->type.punch_hole()) { + + slot->err = AIOHandler::io_complete(slot); + } else { + slot->err = DB_SUCCESS; + } /* Mark this request as completed. The error handling will be done in the calling function. */ @@ -3353,6 +3510,76 @@ struct WinIoInit /* Ensures proper initialization and shutdown */ static WinIoInit win_io_init; +/** Check if the file system supports sparse files. +@param[in] name File name +@return true if the file system supports sparse files */ +static +bool +os_is_sparse_file_supported_win32(const char* filename) +{ + char volname[MAX_PATH]; + BOOL result = GetVolumePathName(filename, volname, MAX_PATH); + + if (!result) { + + ib::error() + << "os_is_sparse_file_supported: " + << "Failed to get the volume path name for: " + << filename + << "- OS error number " << GetLastError(); + + return(false); + } + + DWORD flags; + + result = GetVolumeInformation( + volname, NULL, MAX_PATH, NULL, NULL, + &flags, NULL, MAX_PATH); + + + if (!result) { + ib::error() + << "os_is_sparse_file_supported: " + << "Failed to get the volume info for: " + << volname + << "- OS error number " << GetLastError(); + + return(false); + } + + return(flags & FILE_SUPPORTS_SPARSE_FILES) ? true : false; +} + +/** Free storage space associated with a section of the file. +@param[in] fh Open file handle +@param[in] page_size Tablespace page size +@param[in] block_size File system block size +@param[in] off Starting offset (SEEK_SET) +@param[in] len Size of the hole +@return 0 on success or errno */ +static +dberr_t +os_file_punch_hole_win32( + os_file_t fh, + os_offset_t off, + os_offset_t len) +{ + FILE_ZERO_DATA_INFORMATION punch; + + punch.FileOffset.QuadPart = off; + punch.BeyondFinalZero.QuadPart = off + len; + + /* If lpOverlapped is NULL, lpBytesReturned cannot be NULL, + therefore we pass a dummy parameter. */ + DWORD temp; + BOOL success = os_win32_device_io_control( + fh, FSCTL_SET_ZERO_DATA, &punch, sizeof(punch), + NULL, 0, &temp); + + return(success ? DB_SUCCESS: DB_IO_NO_PUNCH_HOLE); +} + /** Check the existence and type of the given file. @param[in] path path name of file @param[out] exists true if the file exists @@ -3661,9 +3888,9 @@ os_file_create_simple_func( /* This is a best effort use case, if it fails then we will find out when we try and punch the hole. */ - DeviceIoControl( + os_win32_device_io_control( file, FSCTL_SET_SPARSE, NULL, 0, NULL, 0, - &temp, NULL); + &temp); } } while (retry); @@ -4020,9 +4247,9 @@ os_file_create_func( /* This is a best effort use case, if it fails then we will find out when we try and punch the hole. */ - DeviceIoControl( + os_win32_device_io_control( file, FSCTL_SET_SPARSE, NULL, 0, NULL, 0, - &temp, NULL); + &temp); } } while (retry); @@ -4459,28 +4686,6 @@ os_file_get_status_win32( } stat_info->block_size = bytesPerSector * sectorsPerCluster; - - /* On Windows the block size is not used as the allocation - unit for sparse files. The underlying infra-structure for - sparse files is based on NTFS compression. The punch hole - is done on a "compression unit". This compression unit - is based on the cluster size. You cannot punch a hole if - the cluster size >= 8K. For smaller sizes the table is - as follows: - - Cluster Size Compression Unit - 512 Bytes 8 KB - 1 KB 16 KB - 2 KB 32 KB - 4 KB 64 KB - - Default NTFS cluster size is 4K, compression unit size of 64K. - Therefore unless the user has created the file system with - a smaller cluster size and used larger page sizes there is - little benefit from compression out of the box. */ - - stat_info->block_size = (stat_info->block_size <= 4096) - ? stat_info->block_size * 16 : ULINT_UNDEFINED; } else { stat_info->type = OS_FILE_TYPE_UNKNOWN; } @@ -4615,7 +4820,18 @@ os_file_io( } else if ((ulint) n_bytes + bytes_returned == n) { bytes_returned += n_bytes; - *err = DB_SUCCESS; + + if (offset > 0 + && !type.is_log() + && type.is_write() + && type.punch_hole()) { + *err = type.punch_hole(file, + static_cast(offset), + n); + + } else { + *err = DB_SUCCESS; + } return(original_n); } @@ -4668,7 +4884,7 @@ ssize_t os_file_pwrite( IORequest& type, os_file_t file, - const void* buf, + const byte* buf, ulint n, os_offset_t offset, dberr_t* err) @@ -4680,7 +4896,7 @@ os_file_pwrite( (void) my_atomic_addlint(&os_n_pending_writes, 1); MONITOR_ATOMIC_INC(MONITOR_OS_PENDING_WRITES); - ssize_t n_bytes = os_file_io(type, file, const_cast(buf), + ssize_t n_bytes = os_file_io(type, file, const_cast(buf), n, offset, err); (void) my_atomic_addlint(&os_n_pending_writes, -1); @@ -4696,8 +4912,9 @@ os_file_pwrite( @param[in] offset file offset from the start where to read @param[in] n number of bytes to read, starting from offset @return DB_SUCCESS if request was successful, false if fail */ +static MY_ATTRIBUTE((warn_unused_result)) dberr_t -os_file_write_func( +os_file_write_page( IORequest& type, const char* name, os_file_t file, @@ -4711,7 +4928,7 @@ os_file_write_func( ut_ad(type.validate()); ut_ad(n > 0); - ssize_t n_bytes = os_file_pwrite(type, file, buf, n, offset, &err); + ssize_t n_bytes = os_file_pwrite(type, file, (byte*)buf, n, offset, &err); if ((ulint) n_bytes != n && !os_has_said_disk_full) { @@ -5195,6 +5412,31 @@ os_file_read_no_error_handling_func( return(os_file_read_page(type, file, buf, offset, n, o, false)); } +/** NOTE! Use the corresponding macro os_file_write(), not directly +Requests a synchronous write operation. +@param[in] type IO flags +@param[in] file handle to an open file +@param[out] buf buffer from which to write +@param[in] offset file offset from the start where to read +@param[in] n number of bytes to read, starting from offset +@return DB_SUCCESS if request was successful, false if fail */ +dberr_t +os_file_write_func( + IORequest& type, + const char* name, + os_file_t file, + const void* buf, + os_offset_t offset, + ulint n) +{ + ut_ad(type.validate()); + ut_ad(type.is_write()); + + const byte* ptr = reinterpret_cast(buf); + + return(os_file_write_page(type, name, file, ptr, offset, n)); +} + /** Check the existence and type of the given file. @param[in] path path name of file @param[out] exists true if the file exists @@ -5213,6 +5455,110 @@ os_file_status( #endif /* _WIN32 */ } +/** Free storage space associated with a section of the file. +@param[in] fh Open file handle +@param[in] off Starting offset (SEEK_SET) +@param[in] len Size of the hole +@return DB_SUCCESS or error code */ +dberr_t +os_file_punch_hole( + os_file_t fh, + os_offset_t off, + os_offset_t len) +{ + dberr_t err; + +#ifdef _WIN32 + err = os_file_punch_hole_win32(fh, off, len); +#else + err = os_file_punch_hole_posix(fh, off, len); +#endif /* _WIN32 */ + + return (err); +} + +/** Free storage space associated with a section of the file. +@param[in] fh Open file handle +@param[in] off Starting offset (SEEK_SET) +@param[in] len Size of the hole +@return DB_SUCCESS or error code */ +dberr_t +IORequest::punch_hole( + os_file_t fh, + os_offset_t off, + os_offset_t len) +{ + /* In this debugging mode, we act as if punch hole is supported, + and then skip any calls to actually punch a hole here. + In this way, Transparent Page Compression is still being tested. */ + DBUG_EXECUTE_IF("ignore_punch_hole", + return(DB_SUCCESS); + ); + + ulint trim_len = get_trim_length(len); + + if (trim_len == 0) { + return(DB_SUCCESS); + } + + off += len; + + /* Check does file system support punching holes for this + tablespace. */ + if (!should_punch_hole() || !srv_use_trim) { + return DB_IO_NO_PUNCH_HOLE; + } + + dberr_t err = os_file_punch_hole(fh, off, len); + + if (err == DB_SUCCESS) { + srv_stats.page_compressed_trim_op.inc(); + } else { + /* If punch hole is not supported, + set space so that it is not used. */ + if (err == DB_IO_NO_PUNCH_HOLE) { + space_no_punch_hole(); + err = DB_SUCCESS; + } + } + + return (err); +} + +/** Check if the file system supports sparse files. + +Warning: On POSIX systems we try and punch a hole from offset 0 to +the system configured page size. This should only be called on an empty +file. + +Note: On Windows we use the name and on Unices we use the file handle. + +@param[in] name File name +@param[in] fh File handle for the file - if opened +@return true if the file system supports sparse files */ +bool +os_is_sparse_file_supported(const char* path, os_file_t fh) +{ + /* In this debugging mode, we act as if punch hole is supported, + then we skip any calls to actually punch a hole. In this way, + Transparent Page Compression is still being tested. */ + DBUG_EXECUTE_IF("ignore_punch_hole", + return(true); + ); + +#ifdef _WIN32 + return(os_is_sparse_file_supported_win32(path)); +#else + dberr_t err; + + /* We don't know the FS block size, use the sector size. The FS + will do the magic. */ + err = os_file_punch_hole_posix(fh, 0, UNIV_PAGE_SIZE); + + return(err == DB_SUCCESS); +#endif /* _WIN32 */ +} + /** This function returns information about the specified file @param[in] path pathname of the file @param[out] stat_info information of a file in a directory @@ -5776,12 +6122,7 @@ AIO::reserve_slot( const char* name, void* buf, os_offset_t offset, - ulint len, - ulint* write_size)/*!< in/out: Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ + ulint len) { #ifdef WIN_ASYNC_IO ut_a((len & 0xFFFFFFFFUL) == len); @@ -5871,8 +6212,6 @@ AIO::reserve_slot( slot->ptr = slot->buf; slot->offset = offset; slot->err = DB_SUCCESS; - slot->write_size = write_size; - slot->is_log = type.is_log(); slot->original_len = static_cast(len); slot->io_already_done = false; slot->buf = static_cast(buf); @@ -6225,6 +6564,7 @@ Requests an asynchronous i/o operation. @param[in,out] m2 message for the AIO handler (can be used to identify a completed AIO operation); ignored if mode is OS_AIO_SYNC + @return DB_SUCCESS or error code */ dberr_t os_aio_func( @@ -6237,12 +6577,7 @@ os_aio_func( ulint n, bool read_only, fil_node_t* m1, - void* m2, - ulint* write_size)/*!< in/out: Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ + void* m2) { #ifdef WIN_ASYNC_IO BOOL ret = TRUE; @@ -6278,7 +6613,7 @@ try_again: Slot* slot; - slot = array->reserve_slot(type, m1, m2, file, name, buf, offset, n, write_size); + slot = array->reserve_slot(type, m1, m2, file, name, buf, offset, n); if (type.is_read()) { diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index 00b7bd14c98..b2722c2a9bf 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -989,41 +989,6 @@ static monitor_info_t innodb_counter_info[] = MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED}, - {"compress_trim_sect512", "compression", - "Number of sect-512 TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512}, - - {"compress_trim_sect1024", "compression", - "Number of sect-1024 TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024}, - - {"compress_trim_sect2048", "compression", - "Number of sect-2048 TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048}, - - {"compress_trim_sect4096", "compression", - "Number of sect-4K TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096}, - - {"compress_trim_sect8192", "compression", - "Number of sect-8K TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192}, - - {"compress_trim_sect16384", "compression", - "Number of sect-16K TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384}, - - {"compress_trim_sect32768", "compression", - "Number of sect-32K TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768}, - {"compress_pages_page_compressed", "compression", "Number of pages compressed by page compression", MONITOR_NONE, @@ -1034,11 +999,6 @@ static monitor_info_t innodb_counter_info[] = MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP}, - {"compress_page_compressed_trim_op_saved", "compression", - "Number of TRIM operation saved by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED}, - {"compress_pages_page_decompressed", "compression", "Number of pages decompressed by page compression", MONITOR_NONE, @@ -2073,36 +2033,12 @@ srv_mon_process_existing_counter( case MONITOR_OVLD_PAGE_COMPRESS_SAVED: value = srv_stats.page_compression_saved; break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512: - value = srv_stats.page_compression_trim_sect512; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024: - value = srv_stats.page_compression_trim_sect1024; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048: - value = srv_stats.page_compression_trim_sect2048; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096: - value = srv_stats.page_compression_trim_sect4096; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192: - value = srv_stats.page_compression_trim_sect8192; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384: - value = srv_stats.page_compression_trim_sect16384; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768: - value = srv_stats.page_compression_trim_sect32768; - break; case MONITOR_OVLD_PAGES_PAGE_COMPRESSED: value = srv_stats.pages_page_compressed; break; case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP: value = srv_stats.page_compressed_trim_op; break; - case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED: - value = srv_stats.page_compressed_trim_op_saved; - break; case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED: value = srv_stats.pages_page_decompressed; break; diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 5d478e4529f..bd4dd1c80af 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -184,7 +184,7 @@ my_bool srv_use_native_aio = TRUE; my_bool srv_numa_interleave = FALSE; /* If this flag is TRUE, then we will use fallocate(PUCH_HOLE) to the pages */ -UNIV_INTERN my_bool srv_use_trim = FALSE; +UNIV_INTERN my_bool srv_use_trim; /* If this flag is TRUE, then we disable doublewrite buffer */ UNIV_INTERN my_bool srv_use_atomic_writes = FALSE; /* If this flag IS TRUE, then we use this algorithm for page compressing the pages */ @@ -1617,13 +1617,10 @@ srv_export_innodb_status(void) export_vars.innodb_available_undo_logs = srv_available_undo_logs; export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved; - export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512; - export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096; export_vars.innodb_index_pages_written = srv_stats.index_pages_written; export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written; export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed; export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op; - export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved; export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed; export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error; export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted; diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc index d43735bba2e..f597147d50f 100644 --- a/storage/innobase/ut/ut0ut.cc +++ b/storage/innobase/ut/ut0ut.cc @@ -761,6 +761,8 @@ ut_strerr( case DB_NO_FK_ON_S_BASE_COL: return("Cannot add foreign key on the base column " "of stored column"); + case DB_IO_NO_PUNCH_HOLE: + return ("File system does not support punch hole (trim) operation."); /* do not add default: in order to produce a warning if new code is added to the enum but not added here */