diff --git a/mysql-test/suite/innodb/r/sys_defragment.result b/mysql-test/suite/innodb/r/sys_defragment.result new file mode 100644 index 00000000000..f279d694e6e --- /dev/null +++ b/mysql-test/suite/innodb/r/sys_defragment.result @@ -0,0 +1,25 @@ +# restart +SET GLOBAL innodb_file_per_table= 0; +Warnings: +Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a future release +SET GLOBAL innodb_limit_optimistic_insert_debug = 2; +CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL, f3 INT NOT NULL)ENGINE=InnoDB; +INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; +SET GLOBAL innodb_file_per_table= default; +Warnings: +Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a future release +CREATE TABLE t2(f1 INT NOT NULL PRIMARY KEY,f2 VARCHAR(40))ENGINE=InnoDB PARTITION BY KEY() PARTITIONS 256; +INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; +DROP TABLE t2, t1; +InnoDB 0 transactions not purged +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; +name file_size +innodb_system 205520896 +set GLOBAL innodb_fast_shutdown= 0; +# restart +FOUND 1 /InnoDB: Moving the data from extents 4096 through 22016/ in mysqld.1.err +FOUND 1 /InnoDB: Defragmentation of system tablespace is successful/ in mysqld.1.err +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; +name file_size +innodb_system 14680064 +# restart diff --git a/mysql-test/suite/innodb/r/sys_defragment_fail.result b/mysql-test/suite/innodb/r/sys_defragment_fail.result new file mode 100644 index 00000000000..5abf4e65772 --- /dev/null +++ b/mysql-test/suite/innodb/r/sys_defragment_fail.result @@ -0,0 +1,52 @@ +call mtr.add_suppression("InnoDB: Defragmentation of CLUST_IND in SYS_INDEXES failed: Data structure corruption"); +call mtr.add_suppression("InnoDB: Defragmentation of CLUST_IND in SYS_COLUMNS failed: Data structure corruption"); +call mtr.add_suppression("InnoDB: Cannot free the unused segments in system tablespace"); +# restart +set GLOBAL innodb_file_per_table = 0; +Warnings: +Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a future release +set GLOBAL innodb_limit_optimistic_insert_debug = 2; +CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL)ENGINE=InnoDB; +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096; +SET GLOBAL innodb_file_per_table= 1; +Warnings: +Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a future release +CREATE TABLE t2(f1 INT NOT NULL PRIMARY KEY, +f2 VARCHAR(40))ENGINE=InnoDB PARTITION BY KEY() PARTITIONS 256; +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096; +DROP TABLE t2; +InnoDB 0 transactions not purged +# restart +FOUND 1 /InnoDB: User table exists in the system tablespace/ in mysqld.1.err +DROP TABLE t1; +InnoDB 0 transactions not purged +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; +name file_size +innodb_system 58720256 +# restart: --debug_dbug=+d,fail_after_level_defragment +FOUND 1 /InnoDB: Defragmentation of CLUST_IND in SYS_COLUMNS failed./ in mysqld.1.err +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; +name file_size +innodb_system 58720256 +# restart: --debug_dbug=d,allocation_prepare_fail +FOUND 1 /InnoDB: Defragmentation of CLUST_IND in SYS_INDEXES failed./ in mysqld.1.err +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; +name file_size +innodb_system 58720256 +# restart: --debug_dbug=d,relation_page_prepare_fail +FOUND 2 /InnoDB: Defragmentation of CLUST_IND in SYS_INDEXES failed./ in mysqld.1.err +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; +name file_size +innodb_system 58720256 +# restart: --debug_dbug=d,remover_prepare_fail +FOUND 3 /InnoDB: Defragmentation of CLUST_IND in SYS_INDEXES failed./ in mysqld.1.err +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; +name file_size +innodb_system 58720256 +# restart +FOUND 5 /InnoDB: Moving the data from extents 4096 through 8960/ in mysqld.1.err +FOUND 1 /InnoDB: Defragmentation of system tablespace is successful/ in mysqld.1.err +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; +name file_size +innodb_system 15728640 +# restart diff --git a/mysql-test/suite/innodb/r/sys_truncate.result b/mysql-test/suite/innodb/r/sys_truncate.result index bb8eafbe584..909f363096e 100644 --- a/mysql-test/suite/innodb/r/sys_truncate.result +++ b/mysql-test/suite/innodb/r/sys_truncate.result @@ -4,7 +4,7 @@ Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a fu SET UNIQUE_CHECKS=0, FOREIGN_KEY_CHECKS=0; CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL, f3 INT NOT NULL, INDEX(f1), -INDEX(f2), INDEX(f3))ENGINE=InnoDB; +INDEX(f2), INDEX(f3))STATS_PERSISTENT=0 ENGINE=InnoDB; BEGIN; INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; diff --git a/mysql-test/suite/innodb/r/sys_truncate_debug.result b/mysql-test/suite/innodb/r/sys_truncate_debug.result index b198d449a76..f51693d849c 100644 --- a/mysql-test/suite/innodb/r/sys_truncate_debug.result +++ b/mysql-test/suite/innodb/r/sys_truncate_debug.result @@ -9,7 +9,7 @@ Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a fu SET UNIQUE_CHECKS=0, FOREIGN_KEY_CHECKS=0; CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL, f3 INT NOT NULL, INDEX(f1), -INDEX(f2), INDEX(f3))ENGINE=InnoDB; +INDEX(f2), INDEX(f3))STATS_PERSISTENT=0 ENGINE=InnoDB; BEGIN; INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; diff --git a/mysql-test/suite/innodb/t/sys_defragment.opt b/mysql-test/suite/innodb/t/sys_defragment.opt new file mode 100644 index 00000000000..beba09e7f48 --- /dev/null +++ b/mysql-test/suite/innodb/t/sys_defragment.opt @@ -0,0 +1,6 @@ +--innodb_page_size=4k +--innodb_data_file_path=ibdata1:1M:autoextend:autoshrink +--innodb_undo_tablespaces=0 +--innodb_stats_persistent=0 +--skip_partition=0 +--innodb_sys_tablespaces diff --git a/mysql-test/suite/innodb/t/sys_defragment.test b/mysql-test/suite/innodb/t/sys_defragment.test new file mode 100644 index 00000000000..93b219458a6 --- /dev/null +++ b/mysql-test/suite/innodb/t/sys_defragment.test @@ -0,0 +1,40 @@ +--source include/have_innodb.inc +--source include/have_sequence.inc +--source include/have_debug.inc + +--let MYSQLD_DATADIR= `SELECT @@datadir` +--source include/shutdown_mysqld.inc +--copy_file $MYSQLD_DATADIR/ibdata1 $MYSQLD_DATADIR/ibdata1_copy +--copy_file $MYSQLD_DATADIR/ib_logfile0 $MYSQLD_DATADIR/ib_logfile0_copy +--source include/start_mysqld.inc + +SET GLOBAL innodb_file_per_table= 0; +SET GLOBAL innodb_limit_optimistic_insert_debug = 2; +CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL, f3 INT NOT NULL)ENGINE=InnoDB; +INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; +SET GLOBAL innodb_file_per_table= default; +CREATE TABLE t2(f1 INT NOT NULL PRIMARY KEY,f2 VARCHAR(40))ENGINE=InnoDB PARTITION BY KEY() PARTITIONS 256; +INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; +DROP TABLE t2, t1; +--source include/wait_all_purged.inc + +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; + +set GLOBAL innodb_fast_shutdown= 0; +--source include/restart_mysqld.inc + +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_PATTERN=InnoDB: Moving the data from extents 4096 through 22016; +--source include/search_pattern_in_file.inc + +let SEARCH_PATTERN=InnoDB: Defragmentation of system tablespace is successful; +--source include/search_pattern_in_file.inc + +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; + +--source include/shutdown_mysqld.inc + +--move_file $MYSQLD_DATADIR/ibdata1_copy $MYSQLD_DATADIR/ibdata1 +--move_file $MYSQLD_DATADIR/ib_logfile0_copy $MYSQLD_DATADIR/ib_logfile0 + +--source include/start_mysqld.inc diff --git a/mysql-test/suite/innodb/t/sys_defragment_fail.opt b/mysql-test/suite/innodb/t/sys_defragment_fail.opt new file mode 100644 index 00000000000..6326b7e90a3 --- /dev/null +++ b/mysql-test/suite/innodb/t/sys_defragment_fail.opt @@ -0,0 +1,6 @@ +--innodb_page_size=4k +--innodb_data_file_path=ibdata1:15M:autoextend:autoshrink +--innodb_undo_tablespaces=0 +--innodb_stats_persistent=0 +--skip_partition=0 +--innodb_sys_tablespaces diff --git a/mysql-test/suite/innodb/t/sys_defragment_fail.test b/mysql-test/suite/innodb/t/sys_defragment_fail.test new file mode 100644 index 00000000000..6136d43d601 --- /dev/null +++ b/mysql-test/suite/innodb/t/sys_defragment_fail.test @@ -0,0 +1,90 @@ +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_sequence.inc + +call mtr.add_suppression("InnoDB: Defragmentation of CLUST_IND in SYS_INDEXES failed: Data structure corruption"); +call mtr.add_suppression("InnoDB: Defragmentation of CLUST_IND in SYS_COLUMNS failed: Data structure corruption"); +call mtr.add_suppression("InnoDB: Cannot free the unused segments in system tablespace"); + +--let MYSQLD_DATADIR= `SELECT @@datadir` +--source include/shutdown_mysqld.inc +--copy_file $MYSQLD_DATADIR/ibdata1 $MYSQLD_DATADIR/ibdata1_copy +--copy_file $MYSQLD_DATADIR/ib_logfile0 $MYSQLD_DATADIR/ib_logfile0_copy +--source include/start_mysqld.inc + +set GLOBAL innodb_file_per_table = 0; +set GLOBAL innodb_limit_optimistic_insert_debug = 2; +CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL)ENGINE=InnoDB; +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096; + +SET GLOBAL innodb_file_per_table= 1; +CREATE TABLE t2(f1 INT NOT NULL PRIMARY KEY, + f2 VARCHAR(40))ENGINE=InnoDB PARTITION BY KEY() PARTITIONS 256; + +INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096; +DROP TABLE t2; + +--source include/wait_all_purged.inc +let $restart_parameters=; +--source include/restart_mysqld.inc + +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_PATTERN=InnoDB: User table exists in the system tablespace; +--source include/search_pattern_in_file.inc +DROP TABLE t1; + +--source include/wait_all_purged.inc +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; + +let $restart_parameters=--debug_dbug=+d,fail_after_level_defragment; +--source include/restart_mysqld.inc + +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_PATTERN=InnoDB: Defragmentation of CLUST_IND in SYS_COLUMNS failed.; +--source include/search_pattern_in_file.inc + +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; + +let $restart_parameters=--debug_dbug=d,allocation_prepare_fail; +--source include/restart_mysqld.inc + +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_PATTERN=InnoDB: Defragmentation of CLUST_IND in SYS_INDEXES failed.; +--source include/search_pattern_in_file.inc + +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; + +let $restart_parameters=--debug_dbug=d,relation_page_prepare_fail; +--source include/restart_mysqld.inc + +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_PATTERN=InnoDB: Defragmentation of CLUST_IND in SYS_INDEXES failed.; +--source include/search_pattern_in_file.inc + +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; + +let $restart_parameters=--debug_dbug=d,remover_prepare_fail; +--source include/restart_mysqld.inc + +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_PATTERN=InnoDB: Defragmentation of CLUST_IND in SYS_INDEXES failed.; +--source include/search_pattern_in_file.inc + +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; + +let $restart_parameters=; +--source include/restart_mysqld.inc + +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_PATTERN= InnoDB: Moving the data from extents 4096 through 8960; +--source include/search_pattern_in_file.inc + +let SEARCH_PATTERN=InnoDB: Defragmentation of system tablespace is successful; +--source include/search_pattern_in_file.inc + +select name, file_size from information_schema.innodb_sys_tablespaces where space = 0; + +--source include/shutdown_mysqld.inc +--move_file $MYSQLD_DATADIR/ibdata1_copy $MYSQLD_DATADIR/ibdata1 +--move_file $MYSQLD_DATADIR/ib_logfile0_copy $MYSQLD_DATADIR/ib_logfile0 +--source include/start_mysqld.inc diff --git a/mysql-test/suite/innodb/t/sys_truncate.opt b/mysql-test/suite/innodb/t/sys_truncate.opt index f940dadffd3..375d9847b65 100644 --- a/mysql-test/suite/innodb/t/sys_truncate.opt +++ b/mysql-test/suite/innodb/t/sys_truncate.opt @@ -1,2 +1,3 @@ --innodb_data_file_path=ibdata1:10M:autoextend:autoshrink --innodb_sys_tablespaces +--innodb_buffer_pool_size=75M diff --git a/mysql-test/suite/innodb/t/sys_truncate.test b/mysql-test/suite/innodb/t/sys_truncate.test index d5e05dea34c..fbba010dc09 100644 --- a/mysql-test/suite/innodb/t/sys_truncate.test +++ b/mysql-test/suite/innodb/t/sys_truncate.test @@ -1,10 +1,11 @@ --source include/have_innodb.inc --source include/have_sequence.inc + SET GLOBAL INNODB_FILE_PER_TABLE= 0; SET UNIQUE_CHECKS=0, FOREIGN_KEY_CHECKS=0; CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL, f3 INT NOT NULL, INDEX(f1), - INDEX(f2), INDEX(f3))ENGINE=InnoDB; + INDEX(f2), INDEX(f3))STATS_PERSISTENT=0 ENGINE=InnoDB; BEGIN; INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; diff --git a/mysql-test/suite/innodb/t/sys_truncate_debug.opt b/mysql-test/suite/innodb/t/sys_truncate_debug.opt index b8a0ed244e4..97647d9843b 100644 --- a/mysql-test/suite/innodb/t/sys_truncate_debug.opt +++ b/mysql-test/suite/innodb/t/sys_truncate_debug.opt @@ -1,3 +1,4 @@ --innodb_data_file_path=ibdata1:1M:autoextend:autoshrink --innodb_sys_tablespaces --innodb_page_size=4k +--innodb_buffer_pool_size=100M diff --git a/mysql-test/suite/innodb/t/sys_truncate_debug.test b/mysql-test/suite/innodb/t/sys_truncate_debug.test index be70ea743e9..7dcb5ffde2a 100644 --- a/mysql-test/suite/innodb/t/sys_truncate_debug.test +++ b/mysql-test/suite/innodb/t/sys_truncate_debug.test @@ -16,7 +16,7 @@ SET GLOBAL INNODB_FILE_PER_TABLE= 0; SET UNIQUE_CHECKS=0, FOREIGN_KEY_CHECKS=0; CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL, f3 INT NOT NULL, INDEX(f1), - INDEX(f2), INDEX(f3))ENGINE=InnoDB; + INDEX(f2), INDEX(f3))STATS_PERSISTENT=0 ENGINE=InnoDB; BEGIN; INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384; diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index cc55ddd66cc..63d68930e0e 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -42,6 +42,7 @@ Created 11/29/1995 Heikki Tuuri #ifndef DBUG_OFF # include "trx0purge.h" #endif +#include #include #include "trx0undo.h" @@ -237,15 +238,13 @@ inline void xdes_set_state(const buf_block_t &block, xdes_t *descr, Gets the state of an xdes. @return state */ UNIV_INLINE -ulint +uint32_t xdes_get_state( /*===========*/ const xdes_t* descr) /*!< in: descriptor */ { - ulint state; - ut_ad(descr); - state = mach_read_from_4(descr + XDES_STATE); + uint32_t state = mach_read_from_4(descr + XDES_STATE); ut_ad(state - 1 < XDES_FSEG); return(state); } @@ -1057,7 +1056,7 @@ fsp_alloc_from_free_frag(buf_block_t *header, buf_block_t *xdes, xdes_t *descr, @param[in,out] mtr mini-transaction @return block, initialized */ static buf_block_t* fsp_page_create(fil_space_t *space, uint32_t offset, - mtr_t *mtr) + mtr_t *mtr) noexcept { buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex), *block= buf_page_create(space, offset, space->zip_size(), mtr, free_block); @@ -3089,14 +3088,13 @@ std::ostream &fseg_header::to_stream(std::ostream &out) const } #endif /* UNIV_DEBUG */ -/** Get the latched extent descriptor page or -acquire the extent descriptor page. +/** Get the latched page page or acquire the page. @param page_id page identifier to be acquired @param mtr mini-transaction @param err error code @return block descriptor */ static -buf_block_t *fsp_get_latched_xdes_page( +buf_block_t *fsp_get_latched_page( page_id_t page_id, mtr_t *mtr, dberr_t *err) { buf_block_t *block= nullptr; @@ -3118,7 +3116,7 @@ class fsp_xdes_old_page const uint32_t m_space; public: fsp_xdes_old_page(uint32_t space):m_space(space) {} - ulint n_pages() + uint32_t n_pages() noexcept { uint32_t count=0; for (uint32_t i= 0; i < m_old_xdes_pages.size(); i++) @@ -3127,7 +3125,7 @@ public: } __attribute__((warn_unused_result)) - dberr_t insert(uint32_t page_no, mtr_t *mtr) + dberr_t insert(uint32_t page_no, mtr_t *mtr) noexcept { uint32_t m_index= page_no >> srv_page_size_shift; if (m_old_xdes_pages.size() > m_index && @@ -3137,7 +3135,7 @@ public: DBUG_EXECUTE_IF("shrink_buffer_pool_full", return DB_OUT_OF_MEMORY;); dberr_t err= DB_SUCCESS; - buf_block_t *block= fsp_get_latched_xdes_page( + buf_block_t *block= fsp_get_latched_page( page_id_t(m_space, page_no), mtr, &err); if (block) { @@ -3154,7 +3152,7 @@ public: return err; } - buf_block_t *search(uint32_t page_no) + buf_block_t *search(uint32_t page_no) noexcept { uint32_t m_index= page_no >> srv_page_size_shift; if (m_index > m_old_xdes_pages.size()) @@ -3162,7 +3160,7 @@ public: return m_old_xdes_pages[m_index]; } - void restore(mtr_t *mtr) + void restore(mtr_t *mtr) noexcept { for (uint32_t i= 0; i < m_old_xdes_pages.size(); i++) { @@ -3198,11 +3196,11 @@ static dberr_t fsp_lst_update_skip( buf_block_t *header, uint16_t hdr_offset, fil_addr_t cur_addr, fil_addr_t last_valid_addr, - uint32_t skip_len, mtr_t *mtr) + uint32_t skip_len, mtr_t *mtr) noexcept { dberr_t err= DB_SUCCESS; uint32_t space_id= header->page.id().space(); - buf_block_t *cur= fsp_get_latched_xdes_page( + buf_block_t *cur= fsp_get_latched_page( page_id_t(space_id, cur_addr.page), mtr, &err); if (!cur) return err; @@ -3229,7 +3227,7 @@ dberr_t fsp_lst_update_skip( prev= cur; else { - prev= fsp_get_latched_xdes_page( + prev= fsp_get_latched_page( page_id_t(space_id, last_valid_addr.page), mtr, &err); if (!prev) return err; @@ -3270,7 +3268,7 @@ dberr_t fsp_lst_write_end( buf_block_t *header, uint16_t hdr_offset, fil_addr_t cur_addr, uint32_t skip_len, uint32_t orig_len, - mtr_t *mtr) + mtr_t *mtr) noexcept { dberr_t err= DB_SUCCESS; byte *len_bytes= &header->page.frame[hdr_offset + FLST_LEN]; @@ -3312,7 +3310,7 @@ func_exit: header->page.frame + hdr_offset + FLST_LAST, cur_addr.page, cur_addr.boffset, mtr); - buf_block_t *cur_block= fsp_get_latched_xdes_page( + buf_block_t *cur_block= fsp_get_latched_page( page_id_t(header->page.id().space(), cur_addr.page), mtr, &err); @@ -3340,7 +3338,7 @@ func_exit: __attribute__((warn_unused_result)) static dberr_t fsp_shrink_list(buf_block_t *header, uint16_t hdr_offset, - uint32_t threshold, mtr_t *mtr) + uint32_t threshold, mtr_t *mtr) noexcept { ut_ad(mach_read_from_4(header->page.frame + FIL_PAGE_OFFSET) == 0); const uint32_t len= flst_get_len(hdr_offset + header->page.frame); @@ -3362,7 +3360,7 @@ dberr_t fsp_shrink_list(buf_block_t *header, uint16_t hdr_offset, ut_ad(!(addr.page & (srv_page_size - 1))); if (!descr_block || descr_block->page.id().page_no() != addr.page) { - descr_block= fsp_get_latched_xdes_page( + descr_block= fsp_get_latched_page( page_id_t(header->page.id().space(), addr.page), mtr, &err); if (!descr_block) return err; @@ -3426,7 +3424,7 @@ dberr_t fsp_xdes_reset(uint32_t space_id, uint32_t threshold, mtr_t *mtr) 0, (cur_descr_page + srv_page_size - 1)); last_descr_offset+= XDES_SIZE; dberr_t err= DB_SUCCESS; - buf_block_t *block= fsp_get_latched_xdes_page( + buf_block_t *block= fsp_get_latched_page( page_id_t(space_id, cur_descr_page), mtr, &err); if (!block) return err; @@ -3478,7 +3476,7 @@ dberr_t fsp_traverse_extents( { if (!block) { - block= fsp_get_latched_xdes_page( + block= fsp_get_latched_page( page_id_t(space->id, last_descr_page_no), mtr, &err); if (!block) return err; @@ -3546,32 +3544,31 @@ dberr_t fsp_traverse_extents( return err; } -#ifdef UNIV_DEBUG /** Validate the system tablespace list */ __attribute__((warn_unused_result)) -dberr_t fsp_tablespace_validate(fil_space_t *space) +static dberr_t fsp_tablespace_validate(fil_space_t *space, + mtr_t *mtr) noexcept { /* Validate all FSP list in system tablespace */ - mtr_t local_mtr; dberr_t err= DB_SUCCESS; - local_mtr.start(); - if (buf_block_t *header= fsp_get_header( - space, &local_mtr, &err)) + if (buf_block_t *header= fsp_get_header(space, mtr, &err)) { - flst_validate(header, FSP_FREE + FSP_HEADER_OFFSET, &local_mtr); - flst_validate(header, FSP_FREE_FRAG + FSP_HEADER_OFFSET, - &local_mtr); - flst_validate(header, FSP_HEADER_OFFSET + FSP_FULL_FRAG, - &local_mtr); - flst_validate(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL, - &local_mtr); - flst_validate(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, - &local_mtr); + err= flst_validate(header, FSP_FREE + FSP_HEADER_OFFSET, mtr); + if (err == DB_SUCCESS) + err= flst_validate(header, FSP_FREE_FRAG + FSP_HEADER_OFFSET, + mtr); + if (err == DB_SUCCESS) + err= flst_validate(header, FSP_HEADER_OFFSET + FSP_FULL_FRAG, + mtr); + if (err == DB_SUCCESS) + err= flst_validate(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL, + mtr); + if (err == DB_SUCCESS) + err= flst_validate(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, + mtr); } - local_mtr.commit(); return err; } -#endif /* UNIV_DEBUG */ /** Store the inode information which basically stores the page and offset */ @@ -4017,6 +4014,1748 @@ dberr_t fil_space_t::garbage_collect(bool shutdown) return unused_inodes.free_segs(); } +class SpaceDefragmenter; + +namespace flst +{ + /** Validate the file list node for the system tablespace. + @param addr file space address + @return true if validation successful or false */ + static bool node_valid(const fil_addr_t *addr) noexcept + { + return addr->boffset >= FIL_PAGE_DATA && + addr->boffset < (srv_page_size - FIL_PAGE_DATA_END); + } + + /** Prepare the steps for removing the file list node + @param descr_block descriptor block + @param xoffset descriptor offset within the block + @param free_limit maximum free limit in the tablespace + @param mtr mini-transaction + @param prev_block previous block in the list + @param next_block next block in the list + @return error code */ + static dberr_t remove_prepare(const buf_block_t &descr_block, + uint32_t xoffset, uint32_t free_limit, + mtr_t *mtr, buf_block_t **prev_block, + buf_block_t **next_block) noexcept + { + const xdes_t *descr= descr_block.page.frame + xoffset; + fil_addr_t prev_addr= flst_get_prev_addr(descr); + fil_addr_t next_addr= flst_get_next_addr(descr); + dberr_t err= DB_SUCCESS; + + if (prev_addr.page != FIL_NULL) + { + if (!node_valid(&prev_addr)) + return DB_CORRUPTION; + + *prev_block= fsp_get_latched_page(page_id_t{0, prev_addr.page}, + mtr, &err); + ut_ad(!*prev_block == (err != DB_SUCCESS)); + + if (!*prev_block) + return err; + + fil_addr_t cur_addr= + flst_get_next_addr((*prev_block)->page.frame + + prev_addr.boffset); + if (cur_addr.page != descr_block.page.id().page_no() || + cur_addr.boffset != xoffset) + return DB_CORRUPTION; + } + + if (next_addr.page != FIL_NULL) + { + if (!node_valid(&next_addr)) + return DB_CORRUPTION; + + *next_block= fsp_get_latched_page(page_id_t{0, next_addr.page}, + mtr, &err); + ut_ad(!*next_block == (err != DB_SUCCESS)); + if (!*next_block) + return err; + + fil_addr_t cur_addr= + flst_get_prev_addr((*next_block)->page.frame + next_addr.boffset); + if (cur_addr.page != descr_block.page.id().page_no() || + cur_addr.boffset != xoffset) + return DB_CORRUPTION; + } + + return err; + } + + /** Complete the steps for removing the file list node + @param base base block where free list starts + @param boffset offset where list starts + @param descr descriptor to be removed + @param mtr mini-transaction */ + static void remove_complete(buf_block_t *base, uint16_t boffset, + xdes_t *descr, mtr_t *mtr) noexcept + { + fil_addr_t prev_addr= flst_get_prev_addr(descr + XDES_FLST_NODE); + fil_addr_t next_addr= flst_get_next_addr(descr + XDES_FLST_NODE); + /* remove_prepare() checked these already */ + ut_ad(next_addr.page == FIL_NULL || node_valid(&next_addr)); + ut_ad(prev_addr.page == FIL_NULL || node_valid(&prev_addr)); + byte *list= base->page.frame + boffset; + + buf_block_t *prev_block= nullptr; + buf_block_t *next_block= nullptr; + + if (prev_addr.page != FIL_NULL) + { + prev_block= + mtr->get_already_latched(page_id_t{0, prev_addr.page}, + MTR_MEMO_PAGE_SX_FIX); + ut_ad(prev_block); + + flst_write_addr(*prev_block, prev_block->page.frame + + prev_addr.boffset + FLST_NEXT, + next_addr.page, next_addr.boffset, mtr); + } + else + flst_write_addr(*base, list + FLST_FIRST, + next_addr.page, next_addr.boffset, mtr); + + if (next_addr.page != FIL_NULL) + { + next_block= + mtr->get_already_latched(page_id_t{0, next_addr.page}, + MTR_MEMO_PAGE_SX_FIX); + ut_ad(next_block); + + flst_write_addr(*next_block, next_block->page.frame + + next_addr.boffset + FLST_PREV, + prev_addr.page, prev_addr.boffset, mtr); + } + else + flst_write_addr(*base, list + FLST_LAST, + prev_addr.page, prev_addr.boffset, mtr); + + /* All callers of remove_prepare() does check the FLST_LEN of + the list */ + byte *len= list + FLST_LEN; + mtr->write<4>(*base, len, mach_read_from_4(len) - 1); + } + + /** Prepare the steps for adding the block into last of the list + @param base block where list starts + @param boffset offset to find the list + @param free_limit maximum free limit in the tablespace + @param mtr mini-transaction + @param last_block_list last block in the list + @return error code */ + static dberr_t append_prepare(const buf_block_t &base, uint16_t boffset, + uint32_t free_limit, mtr_t *mtr, + buf_block_t **last_block_list) noexcept + { + ut_ad(!*last_block_list); + if (!flst_get_len(base.page.frame + boffset)) + return DB_SUCCESS; + + fil_addr_t addr= flst_get_last(base.page.frame + boffset); + + if (addr.page >= free_limit) + return DB_CORRUPTION; + + if (!node_valid(&addr)) + return DB_CORRUPTION; + + dberr_t err= DB_SUCCESS; + *last_block_list= fsp_get_latched_page(page_id_t{0, addr.page}, + mtr, &err); + return err; + } + + /** Complete the steps for adding the block into last of the list + @param base base block where free list starts + @param boffset offset where list starts + @param curr extent descriptor block + @param coffset offset to point the descriptor + @param mtr mini-transaction */ + static void append_complete(buf_block_t *base, uint16_t boffset, + buf_block_t *curr, uint16_t coffset, + mtr_t *mtr) noexcept + { + fil_addr_t last_addr= flst_get_last(base->page.frame + boffset); + ut_ad(last_addr.page == FIL_NULL || node_valid(&last_addr)); + buf_block_t *last_block_list= nullptr; + if (last_addr.page != FIL_NULL) + { + last_block_list= + mtr->get_already_latched(page_id_t{0, last_addr.page}, + MTR_MEMO_PAGE_SX_FIX); + ut_ad(last_block_list); + + fil_addr_t addr= flst_get_last(base->page.frame + boffset); + + flst_write_addr(*last_block_list, + last_block_list->page.frame + addr.boffset + + FLST_NEXT, + curr->page.id().page_no(), coffset, mtr); + flst_write_addr(*curr, + curr->page.frame + coffset + FLST_PREV, + addr.page, addr.boffset, mtr); + flst_write_addr(*base, base->page.frame + boffset + FLST_LAST, + curr->page.id().page_no(), coffset, mtr); + } + else + { + /* Encountered empty list. So add current block as FIRST + and LAST block in the list */ + flst_write_addr(*curr, + curr->page.frame + coffset + FLST_PREV, + FIL_NULL, 0, mtr); + flst_write_addr(*base, base->page.frame + boffset + FLST_FIRST, + curr->page.id().page_no(), coffset, mtr); + memcpy(base->page.frame + boffset + FLST_LAST, + base->page.frame + boffset + FLST_FIRST, FIL_ADDR_SIZE); + mtr->memmove(*base, boffset + FLST_LAST, + boffset + FLST_FIRST, FIL_ADDR_SIZE); + } + + flst_write_addr(*curr, + curr->page.frame + coffset + FLST_NEXT, + FIL_NULL, 0, mtr); + + byte *len= base->page.frame + boffset + FLST_LEN; + mtr->write<4>(*base, len, mach_read_from_4(len) + 1); + } +} /* namespace flst */ + +static dberr_t fseg_validate_low(fil_space_t *space, dict_index_t *index, + mtr_t *mtr) noexcept +{ + dberr_t err= DB_SUCCESS; + buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, &err); + if (UNIV_UNLIKELY(!root)) + return err; + + fseg_header_t *seg_header= + root->page.frame + PAGE_HEADER + PAGE_BTR_SEG_TOP; + buf_block_t *iblock; + fseg_inode_t *inode= fseg_inode_try_get(seg_header, 0, 0, mtr, + &iblock, &err); + if (!inode) + return err; + + uint16_t i_offset= uint16_t(inode - iblock->page.frame); + + err= flst_validate(iblock, uint16_t(i_offset + FSEG_FREE), mtr); + if (err == DB_SUCCESS) + err= flst_validate(iblock, uint16_t(i_offset + FSEG_NOT_FULL), mtr); + if (err == DB_SUCCESS) + err= flst_validate(iblock, uint16_t(i_offset + FSEG_FULL), mtr); + + if (err) return err; + + seg_header= root->page.frame + PAGE_HEADER + PAGE_BTR_SEG_LEAF; + inode= fseg_inode_try_get(seg_header, 0, 0, mtr, &iblock, &err); + if (!inode) + return err; + + i_offset= uint16_t(inode - iblock->page.frame); + + err= flst_validate(iblock, uint16_t(i_offset + FSEG_FREE), mtr); + if (err == DB_SUCCESS) + err= flst_validate(iblock, uint16_t(i_offset + FSEG_NOT_FULL), mtr); + if (err == DB_SUCCESS) + err= flst_validate(iblock, uint16_t(i_offset + FSEG_FULL), mtr); + return err; +} + +/** Validate the system tablespace list */ +__attribute__((warn_unused_result)) +static dberr_t fseg_validate(fil_space_t *space, + dict_index_t *index) noexcept +{ + /* Validate all FSP list in system tablespace */ + mtr_t mtr; + mtr.start(); + dberr_t err= fseg_validate_low(space, index, &mtr); + mtr.commit(); + return err; +} + +/** Prepare the associate pages of the current block and modify +the associated pages */ +class AssociatedPages final +{ + buf_block_t *m_left_block= nullptr; + buf_block_t *m_right_block= nullptr; + buf_block_t *m_parent_block= nullptr; + buf_block_t *const m_cur_block; + mtr_t *const m_mtr; + +public: + AssociatedPages(buf_block_t *cur_block, mtr_t *mtr) + : m_cur_block(cur_block), m_mtr(mtr) {} + + /** Fetch the left, right and parent page for the respective + current block and make sure that there is no issue exist */ + dberr_t prepare(uint32_t parent_page) noexcept + { + uint32_t left_page_no= btr_page_get_prev(m_cur_block->page.frame); + dberr_t err= DB_SUCCESS; + if (left_page_no != FIL_NULL) + { + m_left_block= fsp_get_latched_page(page_id_t{0, left_page_no}, + m_mtr, &err); + ut_ad(!m_left_block == (err != DB_SUCCESS)); + if (!m_left_block) + return err; + } + + uint32_t right_page_no= btr_page_get_next(m_cur_block->page.frame); + if (right_page_no != FIL_NULL) + { + m_right_block= fsp_get_latched_page(page_id_t{0, right_page_no}, + m_mtr, &err); + ut_ad(!m_right_block == (err != DB_SUCCESS)); + if (!m_right_block) + return err; + } + + m_parent_block= fsp_get_latched_page(page_id_t{0, parent_page}, + m_mtr, &err); + return err; + } + + /** Modify the FIL_PAGE_NEXT, FIL_PAGE_PREV, CHILD_PAGE of + respective left, right and parent block to new page number */ + void complete(uint32_t new_page_no, uint32_t parent_offset) noexcept + { + if (m_left_block) + m_mtr->write<4>(*m_left_block, + m_left_block->page.frame + FIL_PAGE_NEXT, + new_page_no); + + if (m_right_block) + m_mtr->write<4>(*m_right_block, + m_right_block->page.frame + FIL_PAGE_PREV, + new_page_no); + + m_mtr->write<4>(*m_parent_block, + m_parent_block->page.frame + parent_offset, + new_page_no); + } +}; + +/** page operation for the system tablespace does the 2 things: +1) Page Allocation +2) Page removal + +Steps for page allocation depends on new extent state. + +(1) If the xdes_get_state(new_descr) == XDES_FREE then +remove the new extent from FSP_FREE list + + (1.1) If the page has to be allocated for segment then + add the newly allocated extent descriptor to + FSEG_NOT_FULL list and make the xdes_set_state(new_descr) + as XDES_FSEG + + (1.2) If the page has to be non-segment page then add the + newly allocated extent descriptor to FSP_FREE_FRAG list + and make the xdes_set_state(new_descr) as XDES_FREE_FRAG + + (1.3) Allocate a page from the new extent + +(2) If the xdes_get_state(new_descr) == XDES_FREE_FRAG then + + (2.1) Allocate a page from the new extent + + (2.2) xdes_get_n_used(new_descr) is FSP_EXTENT_SIZE then + - Remove the new extent descriptor from FSP_FREE_FRAG list + - Add the new extent descriptor to FSP_FULL_FRAG list + and make xdes_set_state(new_descr) as XDES_FULL_FRAG + +(3) If the xdes_get_state(new_descr) == XDES_FSEG then + + (3.1) Allocate a page from extent + + (3.2) xdes_get_n_used(new_descr) is FSP_EXTENT_SIZE then + - Remove the new extent descriptor from FSEG_NOT_FULL list + - Add the new extent descriptor to FSEG_FULL list + + +Steps for removing the page from extent: + + (1) To remove the page from extent and number of used + pages in extent descriptor is FSP_EXTENT_SIZE + + (1a) If the xdes_get_state(m_old_descr) is XDES_FSEG then + move the extent descriptor from FSEG_FULL to FSEG_NOT_FULL + + (1b) If the xdes_get_stats(m_old_descr) is XDES_FREE_FRAG/XDES_FULL_FRAG + then move the extent descriptor from FSP_FULL_FRAG to + FSP_FREE_FRAG list + + (2) If the number of used pages in extent descriptor is 0 then + move the extent descriptor to FSP_FREE + + (3) Free the page and mark the XDES_FREE_BIT of the respective + page in current extent descriptor + +Above all scenario done by 2 steps to make sure that there +will be no error scenario once the modification of the pages +has started. +1) prepare - Basically validates the necessary condition +and make sure that pages are being latched +2) Complete - Completes the action by using the latched +pages in prepare step */ +class PageOperator final +{ + /** Header block for the tablespace */ + buf_block_t *const m_header_block= nullptr; + /** Index node block */ + buf_block_t *const m_iblock= nullptr; + /** Index node */ + fseg_inode_t *const m_inode= nullptr; + /** offset of index node within index node page*/ + uint16_t m_ioffset= 0; + /** Maximum free limit of the tablespace */ + uint32_t m_free_limit= 0; + /** Segment id */ + uint64_t m_seg_id= 0; + /** Extent size */ + uint32_t m_extent_size= 0; + + /** New block to be allocated */ + buf_block_t *m_new_block= nullptr; + /** New block extent descriptor */ + buf_block_t *m_new_xdes= nullptr; + /** New block descriptor */ + xdes_t *m_new_descr= nullptr; + /** New block descriptor offset within xdes page */ + uint16_t m_xoffset= 0; + /** New extent descriptor state */ + uint32_t m_new_state= 0; + /** Need segment allocation */ + bool m_need_segment= false; + /** Old pages during allocation to be saved */ + buf_block_t *m_old_pages[8]= {nullptr}; + /** Page to be removed */ + byte m_old_page_no[4]= {0}; + /** Old block extent descriptor page */ + buf_block_t *m_old_xdes= nullptr; + /** Old block descriptor */ + xdes_t *m_old_descr= nullptr; + /** Old block descriptor offset with descriptor page */ + uint16_t m_old_xoffset= 0; + /** Old descriptor state */ + uint32_t m_old_state= 0; + /** Mini-transaction to allocate & free a page */ + mtr_t *const m_mtr; + + /** Save the old page state of the block before + allocating a page + @param block block to be stored + @return error code */ + dberr_t save_old_page(buf_block_t *block) noexcept + { + if (!block) return DB_SUCCESS; + size_t first_free; + for (first_free= 0; first_free < array_elements(m_old_pages); first_free++) + { + const buf_block_t *b= m_old_pages[first_free]; + if (!b) + goto found; + if (b->page.hash == &block->page) + return DB_SUCCESS; + } + return DB_CORRUPTION; +found: + buf_block_t *old= buf_LRU_get_free_block(have_no_mutex_soft); + if (!old) return DB_OUT_OF_MEMORY; + memcpy_aligned( + old->page.frame, block->page.frame, srv_page_size); + m_old_pages[first_free]= old; + old->page.hash= &block->page; + return DB_SUCCESS; + } + + /** Prepare the steps for free extent allocation by validating + FLST_PREV, FLST_NEXT of choosen extent descriptor + and their FLST_LEN of FSP_FREE list in FSP_HEADER_PAGE. + @return error code or DB_SUCCESS */ + dberr_t free_extent_prepare() noexcept + { + /* At least there should be 1 element in FSP_FREE list */ + byte *len= + &m_header_block->page.frame[FSP_HEADER_OFFSET + FSP_FREE + + FLST_LEN]; + if (mach_read_from_4(len) == 0) + return DB_CORRUPTION; + + buf_block_t *fsp_free_prev= nullptr; + buf_block_t *fsp_free_next= nullptr; + + dberr_t err= flst::remove_prepare(*m_new_xdes, m_xoffset, + m_free_limit, m_mtr, + &fsp_free_prev, &fsp_free_next); + if (err == DB_SUCCESS) + { + err= save_old_page(fsp_free_prev); + if (err == DB_SUCCESS) + err= save_old_page(fsp_free_next); + } + return err; + } + + /** Complete the free extent allocation */ + void free_extent_complete() noexcept + { + flst::remove_complete(m_header_block, FSP_HEADER_OFFSET + FSP_FREE, + m_new_descr, m_mtr); + fil_system.sys_space->free_len--; + } + + /** Prepare the steps to do the following + 1) free extent allocation + 2) Add the extent to FSEG_NOT_FULL list by validating the + last extent descriptor in FSEG_NOT_FULL list of segment inode + @return error code */ + dberr_t initialize_segment_prepare() noexcept + { + dberr_t err= free_extent_prepare(); + if (err) return err; + + buf_block_t *fseg_not_full_last= nullptr; + err= flst::append_prepare(*m_iblock, + uint16_t(m_ioffset + FSEG_NOT_FULL), + m_free_limit, m_mtr, &fseg_not_full_last); + if (err == DB_SUCCESS) + err= save_old_page(fseg_not_full_last); + return err; + } + + /** This function does the following + 1) Allocating the free extent + 2) Appending the extent to FSEG_NOT_FULL list in segment inode + 3) Mark the extent state as XDES_FSEG */ + void initialize_segment_complete() noexcept + { + free_extent_complete(); + flst::append_complete(m_iblock, + uint16_t(m_ioffset + FSEG_NOT_FULL), + m_new_xdes, m_xoffset, m_mtr); + + /* Update the FSEG_NOT_FULL_N_USED in inode */ + byte *p_not_full= m_inode + FSEG_NOT_FULL_N_USED; + m_mtr->write<4>(*m_iblock, p_not_full, + mach_read_from_4(p_not_full) + 1); + xdes_set_state(*m_new_xdes, m_new_descr, XDES_FSEG, m_mtr); + m_mtr->write<8,mtr_t::MAYBE_NOP>(*m_new_xdes, + m_new_descr + XDES_ID, + m_seg_id); + xdes_set_free(*m_new_xdes, m_new_descr, + m_new_block->page.id().page_no() % m_extent_size, + m_mtr); + } + + /** Prepare the steps for + 1) Allocating the free extent + 2) Adding the extent to FSP_FREE_FRAG list by validating + the last extent descriptor in FSP_FREE_FRAG list of FSP_HEADER page + @return error code */ + dberr_t initialize_free_frag_prepare() noexcept + { + dberr_t err= free_extent_prepare(); + if (err) return err; + + buf_block_t *fsp_free_frag_last= nullptr; + err= flst::append_prepare(*m_header_block, + FSP_HEADER_OFFSET + FSP_FREE_FRAG, + m_free_limit, m_mtr, &fsp_free_frag_last); + + if (err == DB_SUCCESS) + err= save_old_page(fsp_free_frag_last); + return err; + } + + /** This function does the following + 1) Allocating the free extent + 2) Appending the extent to FSP_FREE_FRAG list in FSP_HEADER page + 3) Mark the extent state as XDES_FREE_FRAG */ + void initialize_free_frag_complete() noexcept + { + free_extent_complete(); + flst::append_complete(m_header_block, + FSP_HEADER_OFFSET + FSP_FREE_FRAG, + m_new_xdes, m_xoffset, m_mtr); + + byte *n_frag_used= m_header_block->page.frame + + FSP_HEADER_OFFSET + FSP_FRAG_N_USED; + m_mtr->write<4>(*m_header_block, n_frag_used, + mach_read_from_4(n_frag_used) + 1); + + /* Allocate the extent state to FREE_FRAG & update FSP_FRAG_N_USED */ + xdes_set_state(*m_new_xdes, m_new_descr, XDES_FREE_FRAG, m_mtr); + xdes_set_free(*m_new_xdes, m_new_descr, + m_new_block->page.id().page_no() % m_extent_size, + m_mtr); + } + + /** Prepare the steps to + 1) Allocate a page from XDES_FSEG extent + 2) If the extent size is FSP_EXTENT_SIZE then + prepare the extent to move from FSEG_NOT_FULL to FSEG_FULL + list in segment inode by validating the last extent descriptor in + FSEG_FULL list and previous and next extent in FSEG_NOT_FULL list. + @return error code */ + dberr_t alloc_from_fseg_prepare() noexcept + { + uint32_t n_used= xdes_get_n_used(m_new_descr); + if (n_used < 1 || n_used >= m_extent_size) + return DB_CORRUPTION; + + if (n_used < m_extent_size) + return DB_SUCCESS; + + byte *lst= m_iblock->page.frame + uint16_t(m_ioffset + FSEG_NOT_FULL); + if (!mach_read_from_4(lst + FLST_LEN)) + return DB_CORRUPTION; + + buf_block_t *fseg_not_full_prev= nullptr; + buf_block_t *fseg_not_full_next= nullptr; + dberr_t err= flst::remove_prepare(*m_new_xdes, m_xoffset, + m_free_limit, m_mtr, + &fseg_not_full_prev, + &fseg_not_full_next); + if (err) return err; + + buf_block_t *fseg_full_last= nullptr; + err= flst::append_prepare(*m_iblock, + uint16_t(m_ioffset + FSEG_FULL), + m_free_limit, m_mtr, &fseg_full_last); + if (err == DB_SUCCESS) + { + err= save_old_page(fseg_not_full_prev); + if (err == DB_SUCCESS) + err= save_old_page(fseg_not_full_next); + if (err == DB_SUCCESS) + err= save_old_page(fseg_full_last); + } + return err; + } + + /** Does the following + 1) Complete the page allocation from file segment. + 2) If the extent size is FSP_EXTENT_SIZE then + i) Remove the extent from FSEG_NOT_FULL list + ii) Add the extent to FSEG_FULL */ + void alloc_from_fseg_complete() noexcept + { + xdes_set_free(*m_new_xdes, m_new_descr, + m_new_block->page.id().page_no() % m_extent_size, + m_mtr); + + byte *p_not_full= m_inode + FSEG_NOT_FULL_N_USED; + uint32_t n_used_val= mach_read_from_4(p_not_full) + 1; + + if (xdes_get_n_used(m_new_descr) == m_extent_size) + { + n_used_val-= FSP_EXTENT_SIZE; + m_mtr->write<4>(*m_iblock, p_not_full, n_used_val); + flst::remove_complete(m_iblock, + uint16_t(m_ioffset + FSEG_NOT_FULL), + m_new_descr, m_mtr); + flst::append_complete(m_iblock, + uint16_t(m_ioffset + FSEG_FULL), + m_new_xdes, m_xoffset, m_mtr); + } + else + m_mtr->write<4>(*m_iblock, p_not_full, n_used_val); + } + + /** Prepare the steps to + 1) Allocate the page from free fragment extent. + 2) If the extent size is FSP_EXTENT_SIZE then prepare the + steps to move the extent from FSP_FREE_FRAG to FSP_FULL_FRAG + list by validating the next, previous extent descriptor of + current extent descriptor in FSP_FREE_FRAG list and + last extent descriptor in FSP_FULL_FRAG list + @return error code */ + dberr_t alloc_from_free_frag_prepare() noexcept + { + uint32_t n_used= xdes_get_n_used(m_new_descr); + if (n_used < 1 || n_used >= m_extent_size) + return DB_CORRUPTION; + + if (n_used < m_extent_size) + return DB_SUCCESS; + + byte *lst= m_header_block->page.frame + FSP_HEADER_OFFSET + FSP_FREE_FRAG; + if (!mach_read_from_4(lst + FLST_LEN)) + return DB_CORRUPTION; + + buf_block_t *fsp_free_frag_prev= nullptr; + buf_block_t *fsp_free_frag_next= nullptr; + dberr_t err= flst::remove_prepare(*m_new_xdes, m_xoffset, + m_free_limit, m_mtr, + &fsp_free_frag_prev, + &fsp_free_frag_next); + if (err) return err; + + buf_block_t *fsp_full_frag_last= nullptr; + err= flst::append_prepare(*m_header_block, + FSP_HEADER_OFFSET + FSP_FULL_FRAG, + m_free_limit, m_mtr, + &fsp_full_frag_last); + + if (err == DB_SUCCESS) + { + err= save_old_page(fsp_free_frag_prev); + if (err == DB_SUCCESS) + err= save_old_page(fsp_free_frag_next); + if (err == DB_SUCCESS) + err= save_old_page(fsp_full_frag_last); + } + return err; + } + + /** Does the following + 1) Allocate the page from fragment extent + 2) If the extent size is FSP_EXTENT_SIZE then + i) remove the extent descriptor from FSP_FREE_FRAG list + ii) Add the extent descriptor in FSP_FULL_FRAG list */ + void alloc_from_free_frag_complete() noexcept + { + xdes_set_free(*m_new_xdes, m_new_descr, + m_new_block->page.id().page_no() % m_extent_size, + m_mtr); + + byte *frag_n_used= m_header_block->page.frame + FSP_HEADER_OFFSET + + FSP_FRAG_N_USED; + uint32_t n_used_frag= mach_read_from_4(frag_n_used) + 1; + + if (xdes_get_n_used(m_new_descr) == m_extent_size) + { + n_used_frag-= FSP_EXTENT_SIZE; + m_mtr->write<4>(*m_header_block, frag_n_used, n_used_frag); + flst::remove_complete(m_header_block, + FSP_HEADER_OFFSET + FSP_FREE_FRAG, + m_new_descr, m_mtr); + + flst::append_complete(m_header_block, + FSP_HEADER_OFFSET + FSP_FULL_FRAG, + m_new_xdes, m_xoffset, m_mtr); + } + else + m_mtr->write<4>(*m_header_block, frag_n_used, n_used_frag); + } + + /** Prepare the steps to free the page from fragment pages. + 1) Check the page exist in segment fragment array + 2) If the extent descriptor is in XDES_FULL_FRAG then + prepare the steps to move the extent descriptor + from FSP_FULL_FRAG to FSP_FREE_FRAG list by validating + the FLST_PREV, FLST_NEXT of current extent descriptor + and FLST_LAST in FSP_FREE_FRAG list + 3) If the extent is about to empty then prepare the steps + to move the extent descriptor from FSP_FREE_FRAG to FSP_FREE list + by validating the FLST_PREV, FLST_NEXT of current extent + descriptor and FLST_LAST in FSP_FREE list + @return error code */ + dberr_t free_from_frag_prepare() noexcept + { + uint32_t n_arr_slots= m_extent_size / 2; + bool page_exist= false; + for (ulint i= 0; i < n_arr_slots; i++) + { + if (!memcmp(m_inode + FSEG_FRAG_ARR + i * FSEG_FRAG_SLOT_SIZE, + m_old_page_no, 4)) + { + page_exist= true; + break; + } + } + + if (!page_exist) return DB_CORRUPTION; + + buf_block_t *fsp_full_frag_prev= nullptr; + buf_block_t *fsp_full_frag_next= nullptr; + buf_block_t *fsp_free_frag_last= nullptr; + dberr_t err= DB_SUCCESS; + uint32_t n_used= xdes_get_n_used(m_old_descr); + + if (m_old_state == XDES_FULL_FRAG) + { + if (n_used != m_extent_size) + return DB_CORRUPTION; + + byte *lst= m_header_block->page.frame + FSP_HEADER_OFFSET + FSP_FULL_FRAG; + if (!mach_read_from_4(lst + FLST_LEN)) + return DB_CORRUPTION; + + err= flst::remove_prepare(*m_old_xdes, m_old_xoffset, m_free_limit, + m_mtr, &fsp_full_frag_prev, + &fsp_full_frag_next); + + if (err) return err; + + return flst::append_prepare(*m_header_block, + FSP_HEADER_OFFSET + FSP_FREE_FRAG, + m_free_limit, m_mtr, + &fsp_free_frag_last); + } + + if (n_used >= m_extent_size || n_used == 0) + return DB_CORRUPTION; + + buf_block_t *fsp_free_frag_prev= nullptr; + buf_block_t *fsp_free_frag_next= nullptr; + buf_block_t *fsp_free_last= nullptr; + + if (n_used == 1) + { + byte *lst= m_header_block->page.frame + FSP_HEADER_OFFSET + FSP_FREE_FRAG; + if (!mach_read_from_4(lst + FLST_LEN)) + return DB_CORRUPTION; + + err= flst::remove_prepare(*m_old_xdes, m_old_xoffset, m_free_limit, + m_mtr, &fsp_free_frag_prev, + &fsp_free_frag_next); + if (err) return err; + + return flst::append_prepare(*m_header_block, + FSP_HEADER_OFFSET + FSP_FREE, + m_free_limit, m_mtr, + &fsp_free_last); + } + return err; + } + + /** Complete the removal of page from XDES_FREE_FRAG + (or) XDES_FULL_FRAG list. + 1) If the extent is from FSP_FULL_FRAG then move the + extent descriptor from FSP_FULL_FRAG to FSP_FREE_FRAG + 2) If the extent is from FSP_FREE_FRAG and no pages + has been used in that descr then move the extent + from FSP_FREE_FRAG to FSP_FREE */ + void free_from_frag_complete() noexcept + { + uint32_t old_page_no= mach_read_from_4(m_old_page_no); + m_mtr->free(*fil_system.sys_space, old_page_no); + xdes_set_free(*m_old_xdes, m_old_descr, + old_page_no % m_extent_size, m_mtr); + uint32_t n_used= xdes_get_n_used(m_old_descr); + byte *frag_n_used= m_header_block->page.frame + FSP_HEADER_OFFSET + + FSP_FRAG_N_USED; + uint32_t n_frag_used= mach_read_from_4(frag_n_used) - 1; + + for (size_t i= 0, frag= m_ioffset + FSEG_FRAG_ARR; + i < m_extent_size / 2; i++, frag += FSEG_FRAG_SLOT_SIZE) + { + if (!memcmp(m_iblock->page.frame + frag, m_old_page_no, 4)) + { + m_mtr->memset(m_iblock, frag, 4, 0xff); + break; + } + } + + if (n_used == m_extent_size - 1) + { + flst::remove_complete(m_header_block, + FSP_HEADER_OFFSET + FSP_FULL_FRAG, + m_old_descr, m_mtr); + + xdes_set_state(*m_old_xdes, m_old_descr, XDES_FREE_FRAG, m_mtr); + + flst::append_complete(m_header_block, + FSP_HEADER_OFFSET + FSP_FREE_FRAG, + m_old_xdes, m_old_xoffset, m_mtr); + + n_frag_used += m_extent_size; + } + else if (n_used == 0) + { + flst::remove_complete(m_header_block, + FSP_HEADER_OFFSET + FSP_FREE_FRAG, + m_old_descr, m_mtr); + + xdes_set_state(*m_old_xdes, m_old_descr, XDES_FREE, m_mtr); + + flst::append_complete(m_header_block, + FSP_HEADER_OFFSET + FSP_FREE, + m_old_xdes, m_old_xoffset, m_mtr); + } + m_mtr->write<4>(*m_header_block, frag_n_used, n_frag_used); + } + + /** Prepare the removal of page from file segment + 1) If the number of used pages in extent descriptor is + FSP_EXTENT_SIZE then move the extent descriptor from + FSEG_FULL to FSEG_NOT_FULL list by validating the + FLST_PREV, FLST_NEXT of current extent descriptor + and last extent descriptor in FSEG_NOT_FULL list + 2) If the number of used pages in extent descriptor is 0 + then move the extent descriptor from FSEG_NOT_FULL to + FSP_FREE list by validating the FLST_PREV, FLST_NEXT + of current extent descriptor and last extent descriptor + in FSP_FREE list + @return error code */ + dberr_t free_from_fseg_prepare() noexcept + { + if (memcmp(m_old_descr, m_inode + FSEG_ID, 8)) + return DB_CORRUPTION; + + uint32_t n_used= xdes_get_n_used(m_old_descr); + if (n_used == 0 || n_used > m_extent_size) + return DB_CORRUPTION; + + buf_block_t *fseg_full_prev= nullptr; + buf_block_t *fseg_full_next= nullptr; + buf_block_t *fseg_not_full_last= nullptr; + buf_block_t *fseg_not_full_prev= nullptr; + buf_block_t *fseg_not_full_next= nullptr; + buf_block_t *fsp_free_last= nullptr; + + dberr_t err= DB_SUCCESS; + + if (n_used == m_extent_size) + { + byte *lst= m_iblock->page.frame + uint16_t(m_ioffset + FSEG_FULL); + if (!mach_read_from_4(lst + FLST_LEN)) + return DB_CORRUPTION; + + err= flst::remove_prepare(*m_old_xdes, m_old_xoffset, m_free_limit, + m_mtr, &fseg_full_prev, + &fseg_full_next); + if (err) return err; + + err= flst::append_prepare(*m_iblock, + uint16_t(FSEG_NOT_FULL + m_ioffset), + m_free_limit, m_mtr, + &fseg_not_full_last); + if (err) return err; + } + else + { + uint32_t not_full_n_used= + mach_read_from_4(m_inode + FSEG_NOT_FULL_N_USED); + if (!not_full_n_used) return DB_CORRUPTION; + } + + if (n_used == 1) + { + byte *lst= m_iblock->page.frame + uint16_t(m_ioffset + FSEG_NOT_FULL); + if (!mach_read_from_4(lst + FLST_LEN)) + return DB_CORRUPTION; + + err= flst::remove_prepare(*m_old_xdes, m_old_xoffset, m_free_limit, + m_mtr, &fseg_not_full_prev, + &fseg_not_full_next); + if (err) return err; + + err= flst::append_prepare(*m_header_block, + FSP_FREE + FSP_HEADER_OFFSET, + m_free_limit, m_mtr, &fsp_free_last); + } + return err; + } + + /** Complete the removal of page from file segment + 1) If the extent is from FSEG_FULL then move the + extent descriptor from FSEG_FULL to FSEG_NOT_FULL + 2) If the extent is from FSEG_NOT_FULL then move the + extent descriptor to FSP_FREE */ + void free_from_fseg_complete() noexcept + { + uint32_t n_used= xdes_get_n_used(m_old_descr); + uint32_t old_page_no= mach_read_from_4(m_old_page_no); + m_mtr->free(*fil_system.sys_space, old_page_no); + xdes_set_free(*m_old_xdes, m_old_descr, + old_page_no % m_extent_size, m_mtr); + + byte* p_not_full = m_inode + FSEG_NOT_FULL_N_USED; + uint32_t not_full_n_used = mach_read_from_4(p_not_full) - 1; + if (n_used == m_extent_size) + { + flst::remove_complete(m_iblock, uint16_t(m_ioffset + FSEG_FULL), + m_old_descr, m_mtr); + flst::append_complete(m_iblock, + uint16_t(m_ioffset + FSEG_NOT_FULL), + m_old_xdes, m_old_xoffset, m_mtr); + not_full_n_used += m_extent_size; + } + m_mtr->write<4>(*m_iblock, p_not_full, not_full_n_used); + + if (n_used == 1) + { + flst::remove_complete(m_iblock, + uint16_t(m_ioffset + FSEG_NOT_FULL), + m_old_descr, m_mtr); + + xdes_set_state(*m_old_xdes, m_old_descr, XDES_FREE, m_mtr); + flst::append_complete(m_header_block, + FSP_HEADER_OFFSET + FSP_FREE, + m_old_xdes, m_old_xoffset, m_mtr); + fil_system.sys_space->free_len++; + } + } +public: + PageOperator(buf_block_t *header_block, buf_block_t *iblock, + fseg_inode_t *inode, + uint32_t extent_size, byte* old_page_no, + mtr_t *mtr) : + m_header_block(header_block), + m_iblock(iblock), m_inode(inode), m_extent_size(extent_size), + m_mtr(mtr) + { + if (old_page_no) + memcpy(m_old_page_no, old_page_no, 4); + m_free_limit= mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + + m_header_block->page.frame); + m_seg_id= mach_read_from_8(m_inode + FSEG_ID); + } + + ~PageOperator() + { + for (buf_block_t *old : m_old_pages) + if (old) + { + old->page.hash= nullptr; + buf_block_free(old); + } + } + + + /** Get allocated new block */ + buf_block_t* get_new_block() const noexcept { return m_new_block; } + + /** Prepare the new page allocation from the new given extent + @param new_extent starting page of new extent + @param segment segment allocation + @return error code */ + dberr_t prepare_new_page(uint32_t new_extent, bool segment) noexcept + { + dberr_t err= DB_SUCCESS; + uint32_t size= mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE + + m_header_block->page.frame); + if (new_extent >= size || new_extent >= m_free_limit) + return DB_CORRUPTION; + + uint32_t new_descr_page_no= xdes_calc_descriptor_page(0, new_extent); + m_new_xdes= fsp_get_latched_page(page_id_t{0, new_descr_page_no}, + m_mtr, &err); + if (!m_new_xdes) + return err; + + ut_ad(!m_new_block); + m_ioffset= uint16_t(m_inode - m_iblock->page.frame); + m_need_segment= segment; + m_xoffset= uint16_t(xdes_calc_descriptor_index(0, new_extent) * XDES_SIZE + + XDES_ARR_OFFSET + XDES_FLST_NODE); + m_new_descr= m_new_xdes->page.frame + m_xoffset - XDES_FLST_NODE; + m_new_state= uint32_t(xdes_get_state(m_new_descr)); + uint32_t new_page= 0; + + /* Allocate the new extent and initialize the extent state + with XDES_FSEG/XDES_FREE_FRAG */ + if (m_new_state == XDES_FREE) + { + if (segment) err= initialize_segment_prepare(); + else err= initialize_free_frag_prepare(); + + if (err) return err; +new_page: + new_page= xdes_find_free(m_new_descr); + if (new_page == FIL_NULL) + return DB_CORRUPTION; + + new_page+= new_extent; + m_new_block= fsp_page_create(fil_system.sys_space, new_page, m_mtr); + err= save_old_page(m_header_block); + if (err == DB_SUCCESS) + err= save_old_page(m_iblock); + if (err == DB_SUCCESS) + err= save_old_page(m_new_xdes); + if (err == DB_SUCCESS) + err= save_old_page(m_new_block); + return err; + } + + uint32_t n_used= xdes_get_n_used(m_new_descr); + if (n_used == 0 || n_used >= m_extent_size) + return DB_CORRUPTION; + + /* Allocate the page from file segment */ + if (m_seg_id != FIL_NULL && m_new_state == XDES_FSEG && + mach_read_from_8(m_new_descr + XDES_ID) == m_seg_id) + err= alloc_from_fseg_prepare(); + /* Allocate the page from free frag */ + else if (m_new_state == XDES_FREE_FRAG || m_new_state == XDES_FULL_FRAG) + err= alloc_from_free_frag_prepare(); + else return DB_CORRUPTION; + + if (err) return err; + goto new_page; + } + + /** Complete the page allocation from FREE extent descriptor + or XDES_FSEG/XDES_FREE_FRAG extent list */ + void complete_new_page() noexcept + { + if (m_new_state == XDES_FREE) + { + if (m_need_segment) + return initialize_segment_complete(); + return initialize_free_frag_complete(); + } + if (m_new_state == XDES_FSEG) + return alloc_from_fseg_complete(); + return alloc_from_free_frag_complete(); + } + + /** Assign the fragment slot of the index node. + This step should be done after removing the old page + because there is a possiblity that FRAGMENT ARRAY + could be full. */ + void assign_frag_slot() noexcept + { + if ((!m_need_segment && m_new_state == XDES_FREE) || + m_new_state == XDES_FULL_FRAG || + m_new_state == XDES_FREE_FRAG) + fseg_set_nth_frag_page_no(m_inode, m_iblock, + fseg_find_free_frag_page_slot(m_inode), + m_new_block->page.id().page_no(), m_mtr); + } + + /** Restore the page modified during page allocation */ + void restore_old_pages() noexcept + { + for (buf_block_t *old : m_old_pages) + if (old) + memcpy_aligned( + old->page.hash->frame, old->page.frame, srv_page_size); + } + + /** Prepare the steps to remove the page from file segment + (or) fragment extent. + @return error code */ + dberr_t prepare_old_page() noexcept + { + uint32_t old_page_no= mach_read_from_4(m_old_page_no); + uint32_t old_descr_page_no= + xdes_calc_descriptor_page(0, old_page_no); + dberr_t err= DB_SUCCESS; + m_old_xdes= fsp_get_latched_page(page_id_t{0, old_descr_page_no}, + m_mtr, &err); + if (!m_old_xdes) + return err; + + m_old_xoffset= + uint16_t(xdes_calc_descriptor_index(0, old_page_no) * XDES_SIZE + + XDES_ARR_OFFSET + XDES_FLST_NODE); + + m_old_descr= m_old_xdes->page.frame + m_old_xoffset - XDES_FLST_NODE; + m_old_state= uint32_t(xdes_get_state(m_old_descr)); + if (m_old_state == XDES_FREE) + return DB_CORRUPTION; + + if (xdes_is_free(m_old_descr, old_page_no & (m_extent_size -1))) + return DB_CORRUPTION; + + m_ioffset= uint16_t(m_inode - m_iblock->page.frame); + return m_old_state == XDES_FSEG + ? free_from_fseg_prepare() + : free_from_frag_prepare(); + } + + /** Complete the removal of page operation */ + void complete_free_old_page() noexcept + { + return m_old_state == XDES_FSEG + ? free_from_fseg_complete() + : free_from_frag_complete(); + } +}; + + +class IndexDefragmenter final +{ + /** Parent block and its associate offset where + we store the child page number. This is stored + in the form of */ + std::unordered_map m_parent_pages; + + dict_index_t &m_index; + + buf_block_t *m_root; + /** Iterate through the page and map the child_page_no + with the parent page and their associate offset + in m_parent_pages + @param block block to be traversed */ + dberr_t get_child_pages(buf_block_t *block) noexcept; + + /** Get the first block for the given level + @param level level + @param mtr mini-transaction + @param cur_page_no first page number for the given level + @return error code or DB_SUCCESS */ + dberr_t get_level_block(uint16_t level, mtr_t *mtr, + uint32_t *cur_page_no) noexcept; + + /** Defragment the level of the index + @param level level to be defragmented + @param mtr mini-transaction + @param space_defrag space defragmenter information + and also responsible for allocating new + segment or page from tablespace + @return error code or DB_SUCCESS */ + dberr_t defragment_level(uint16_t level, mtr_t *mtr, + SpaceDefragmenter *space_defrag) noexcept; + +public: + IndexDefragmenter(dict_index_t &index): m_index(index) {} + + /** Defragment the index with the help of space defragmenter. + 1) Iterate through each level of the index + 2) Find out what are the pages/segment + to be modified for the index. + 3) Allocate the page from the new segment/extent + 4) Copy the to be changed page content to new page + 5) Change the associative pages in the tree with + new page(left, right, parent block) + 6) Do step (4), (5) within single mini-transaction + and commit the mini-transaction + @return error code or DB_SUCCESS */ + dberr_t defragment(SpaceDefragmenter *space_defrag) noexcept; +}; + +class SpaceDefragmenter final +{ + /** Extent is already allocated for defragmentation */ + static constexpr uint32_t XDES_USED= ~0U; + /** Store the extent information in the tablespace */ + std::map m_extent_info; + /** Map of last used extent with early unused extent within + the tablespace */ + std::map m_extent_map; + + /** Collect the extent information from tablespace */ + dberr_t extract_extent_state() noexcept + { + mtr_t mtr; + dberr_t err= DB_SUCCESS; + uint32_t last_descr_page_no= 0; + fil_space_t *space= fil_system.sys_space; + mtr.start(); + mtr.x_lock_space(space); + buf_block_t *last_descr= buf_page_get_gen(page_id_t{space->id, 0}, 0, + RW_S_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, &mtr, + &err); + if (!last_descr) + { +func_exit: + mtr.commit(); + return err; + } + + for (uint32_t xdes_n= 0; xdes_n < space->free_limit; + xdes_n+= m_extent_size) + { + /* Ignore doublewrite buffer extent */ + if (buf_dblwr.is_inside(xdes_n)) + continue; + uint32_t descr_page_no= + xdes_calc_descriptor_page(space->id, xdes_n); + if (descr_page_no != last_descr_page_no) + { + last_descr= buf_page_get_gen(page_id_t{space->id, xdes_n}, + 0, RW_S_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, &mtr, + &err); + if (!last_descr) + goto func_exit; + } + xdes_t *descr= XDES_ARR_OFFSET + XDES_SIZE * + xdes_calc_descriptor_index(0, xdes_n) + last_descr->page.frame; + last_descr_page_no= descr_page_no; + /* Ignore the extent descriptor extent */ + if (xdes_n % srv_page_size == 0 && xdes_get_n_used(descr) == 2) + continue; + m_extent_info[xdes_n]= xdes_get_state(descr); + } + goto func_exit; + } + + /** Find the earlier free extent for the given used extent + @param max_limit Find the extent below max limit extent + @return value + @retval FIL_NULL if there is no extent */ + uint32_t find_free_extent(uint32_t max_limit) noexcept + { + for (auto &extent_info : m_extent_info) + { + if (max_limit <= extent_info.first) + return FIL_NULL; + + if (extent_info.second == XDES_FREE) + { + /* Mark the extent as used one */ + extent_info.second = XDES_USED; + return extent_info.first; + } + } + return FIL_NULL; + } + + /** Defragment the indexes */ + dberr_t defragment_index(dict_index_t &index) noexcept + { + IndexDefragmenter index_defrag(index); + return index_defrag.defragment(this); + } + + /** Defragment the table */ + dberr_t defragment_table(const dict_table_t *table) noexcept + { + for (dict_index_t *index= dict_table_get_first_index(table); + index; index= dict_table_get_next_index(index)) + { + dberr_t err= fseg_validate(fil_system.sys_space, index); + if (err == DB_SUCCESS) + err= defragment_index(*index); + + if (err) + { + sql_print_error("InnoDB: Defragmentation of %s in %s failed: %s", + index->name, table->name.m_name, ut_strerr(err)); + return err; + } + } + return DB_SUCCESS; + } +public: + const uint32_t m_extent_size; + + SpaceDefragmenter() noexcept : m_extent_size(FSP_EXTENT_SIZE) {} + + /** Find the new extent for the existing last used extent + Iterate the tablespace from last and find out the free + extent in the beginning of the tablespace */ + dberr_t find_new_extents() noexcept + { + dberr_t err= extract_extent_state(); + if (err) return err; + + uint32_t free_limit= fil_system.sys_space->free_limit; + uint32_t fixed_size= srv_sys_space.get_min_size(); + while (free_limit > fixed_size) + { + uint32_t state= m_extent_info[free_limit]; + + switch (state) { + case XDES_USED: + goto func_exit; + case XDES_FREE: + goto prev_extent; + case XDES_FSEG: + case XDES_FULL_FRAG: + case XDES_FREE_FRAG: + uint32_t dest= find_free_extent(free_limit); + if (dest == FIL_NULL) + goto func_exit; + m_extent_map[free_limit]= dest; + break; + } +prev_extent: + free_limit-= FSP_EXTENT_SIZE; + } +func_exit: + if (m_extent_map.empty()) + return DB_SUCCESS_LOCKED_REC; + + sql_print_information("InnoDB: System tablespace defragmentation " + "process starts"); + sql_print_information("InnoDB: Moving the data from extents %" + PRIu32 " through %" PRIu32, + m_extent_map.begin()->first, + m_extent_map.rbegin()->first); + return DB_SUCCESS; + } + + /** Defragment the system tables */ + dberr_t defragment_system_tables() noexcept + { + dberr_t err= defragment_table(dict_sys.sys_tables); + if (err == DB_SUCCESS) + err= defragment_table(dict_sys.sys_columns); + if (err == DB_SUCCESS) + err= defragment_table(dict_sys.sys_indexes); + if (err == DB_SUCCESS) + err= defragment_table(dict_sys.sys_fields); + if (err == DB_SUCCESS) + err= defragment_table(dict_sys.sys_foreign); + if (err == DB_SUCCESS) + err= defragment_table(dict_sys.sys_foreign_cols); + if (err == DB_SUCCESS) + err= defragment_table(dict_sys.sys_virtual); + + if (err == DB_SUCCESS) + sql_print_information("InnoDB: Defragmentation of system " + "tablespace is successful"); + return err; + } + + /** @return extent which replaces the later extent + or same extent if there is no replacement exist */ + uint32_t get_new_extent(uint32_t old_extent) const noexcept + { + auto it= m_extent_map.find(old_extent); + if (it != m_extent_map.end()) + return it->second; + return old_extent; + } + + /** @return state for the given extent */ + uint32_t get_state(uint32_t extent) noexcept + { + return m_extent_info[extent]; + } +}; + +dberr_t IndexDefragmenter::get_child_pages(buf_block_t *block) noexcept +{ + const byte *page= block->page.frame; + const rec_t *rec= page_rec_get_next_low(page + PAGE_OLD_INFIMUM, false); + while (rec != page + PAGE_OLD_SUPREMUM) + { + ulint len; + ulint offset= rec_get_nth_field_offs_old(rec, + rec_get_n_fields_old(rec) - 1, + &len); + if (len != 4) + return DB_CORRUPTION; + + if (offset >= srv_page_size) + return DB_CORRUPTION; + + const byte *field= rec + offset; + /* m_parent_pages[child_page_no] = + 1st 32 bit to indicate offset in parent page + 2nd 32 bit to indicate parent page number */ + m_parent_pages[mach_read_from_4(field)]= + uint64_t(page_offset(field)) << 32 | block->page.id().page_no(); + rec= page_rec_get_next_low(rec, false); + } + return DB_SUCCESS; +} + +dberr_t IndexDefragmenter::get_level_block(uint16_t level, mtr_t *mtr, + uint32_t *cur_page_no) noexcept +{ + uint32_t child_page_no= m_index.page; + dberr_t err= DB_SUCCESS; + uint16_t prev_level= UINT16_MAX; + while (1) + { + buf_block_t *block= fsp_get_latched_page(page_id_t{0, child_page_no}, + mtr, &err); + if (!block) + return err; + + page_t *page= buf_block_get_frame(block); + uint16_t cur_level= btr_page_get_level(page); + if (cur_level == level) + break; + + if (prev_level == UINT16_MAX) + prev_level= cur_level; + else if (prev_level != cur_level + 1) + return DB_CORRUPTION; + + const rec_t *rec= page_rec_get_next_low(page + PAGE_OLD_INFIMUM, false); + if (rec && rec != page + PAGE_OLD_SUPREMUM) + { + ulint len; + rec+= rec_get_nth_field_offs_old(rec, rec_get_n_fields_old(rec) - 1, + &len); + if (len != 4 || rec + len - page > page_header_get_field(page, + PAGE_HEAP_TOP)) + return DB_CORRUPTION; + child_page_no= mach_read_from_4(rec); + } + else + return DB_CORRUPTION; + if (cur_level == level + 1) + break; + prev_level= cur_level; + } + *cur_page_no= child_page_no; + return err; +} + +dberr_t IndexDefragmenter::defragment_level( + uint16_t level, + mtr_t *mtr, + SpaceDefragmenter *space_defrag) noexcept +{ + uint32_t cur_page_no= FIL_NULL; + dberr_t err= get_level_block(level, mtr, &cur_page_no); + if (err) + return err; + + fil_space_t *const space= fil_system.sys_space; + uint32_t extent_size= space_defrag->m_extent_size; + + buf_block_t *block= fsp_get_latched_page(page_id_t{0, cur_page_no}, + mtr, &err); + if (!block) + return err; + + for (;;) + { + page_t *page= buf_block_get_frame(block); + uint32_t next_page_no= btr_page_get_next(page); + uint32_t cur_extent= (cur_page_no / extent_size) * extent_size; + uint32_t old_state= space_defrag->get_state(cur_extent); + + if (old_state == XDES_FREE) + { +fetch_next_page: + if (next_page_no == FIL_NULL) + break; + mtr->commit(); + cur_page_no= next_page_no; + + mtr->start(); + mtr->x_lock_space(space); + block= fsp_get_latched_page(page_id_t{0, cur_page_no}, + mtr, &err); + if (!block) + return err; + continue; + } + + uint32_t new_extent= space_defrag->get_new_extent(cur_extent); + /* There is no need for extent to be changed */ + if (new_extent == cur_extent) + { + if (level) + { + /* Store the child page number and their offset + exist in the parent block records */ + err= get_child_pages(block); + if (err) return err; + } + goto fetch_next_page; + } + + buf_block_t *header_block= + fsp_get_latched_page(page_id_t{0, 0}, mtr, &err); + if (!header_block) + return err; + + const fseg_header_t *seg_header= m_root->page.frame + + (level ? PAGE_HEADER + PAGE_BTR_SEG_TOP + : PAGE_HEADER + PAGE_BTR_SEG_LEAF); + + buf_block_t *iblock; + fseg_inode_t *inode= fseg_inode_try_get(seg_header, 0, 0, mtr, + &iblock, &err); + if (!inode) + return err; + + auto parent_it= m_parent_pages.find(cur_page_no); + if (parent_it == m_parent_pages.end()) + { + err= DB_CORRUPTION; + return err; + } + + uint32_t parent_page_no= uint32_t(parent_it->second); + + uint32_t parent_offset= uint32_t(parent_it->second >> 32); + + if (parent_offset >= srv_page_size - FIL_PAGE_DATA_END) + { + err= DB_CORRUPTION; + return err; + } + + PageOperator operation(header_block, iblock, inode, extent_size, + page + FIL_PAGE_OFFSET, mtr); + + AssociatedPages related_pages(block, mtr); + + err= operation.prepare_new_page(new_extent, old_state == XDES_FSEG); + + DBUG_EXECUTE_IF("allocation_prepare_fail", err= DB_CORRUPTION;); + if (err) + { +err_exit: + operation.restore_old_pages(); + mtr->discard_modifications(); + return err; + } + + err= related_pages.prepare(parent_page_no); + DBUG_EXECUTE_IF("relation_page_prepare_fail", err= DB_CORRUPTION;); + + if (err) goto err_exit; + + operation.complete_new_page(); + + /* After allocating the new page, try to prepare the steps + of page removal function. Because there is a possiblity that + last block in FSEG_NOT_FULL/FSP_FREE_FRAG/FSP_FREE last block + could've changed while allocating the new block. */ + err= operation.prepare_old_page(); + + DBUG_EXECUTE_IF("remover_prepare_fail", err= DB_CORRUPTION;); + if (err) goto err_exit; + + /* Copy the data from old block to new block */ + buf_block_t *new_block= operation.get_new_block(); + uint32_t new_page_no= new_block->page.id().page_no(); + /* Copy FIL_PAGE_PREV, FIL_PAGE_NEXT */ + mtr->memcpy(*new_block, + new_block->page.frame + FIL_PAGE_PREV, + block->page.frame + FIL_PAGE_PREV, + page_has_next(block->page.frame) ? 8 : 4); + mtr->memcpy(*new_block, new_block->page.frame + FIL_PAGE_TYPE, + block->page.frame + FIL_PAGE_TYPE, + srv_page_size - FIL_PAGE_TYPE - 8); + + /* Assign the new block page number in left, right + and parent block */ + related_pages.complete(new_page_no, parent_offset); + + /* Complete the page free operation */ + operation.complete_free_old_page(); + /* Add the new page in inode fragment array */ + operation.assign_frag_slot(); + + if (level) + { + err= get_child_pages(new_block); + if (err) return err; + } + goto fetch_next_page; + } + + ut_a(!fsp_tablespace_validate(space, mtr)); + ut_a(!fseg_validate_low(space, &m_index, mtr)); + if (level > 1) + { + mtr->commit(); + mtr->start(); + mtr->x_lock_space(space); + } + return DB_SUCCESS; +} + +dberr_t IndexDefragmenter::defragment(SpaceDefragmenter *space_defrag) noexcept +{ + mtr_t mtr; + mtr.start(); + dberr_t err= DB_SUCCESS; + m_index.lock.x_lock(SRW_LOCK_CALL); + fil_space_t *const space= fil_system.sys_space; + mtr.x_lock_space(space); + m_root= btr_root_block_get(&m_index, RW_S_LATCH, &mtr, &err); + if (!m_root) + { + mtr.commit(); + m_index.lock.x_unlock(); + return err; + } + + m_root->page.fix(); + mtr.release_last_page(); + uint16_t level= btr_page_get_level(m_root->page.frame); + while (1) + { + err= defragment_level(level, &mtr, space_defrag); + DBUG_EXECUTE_IF("fail_after_level_defragment", + if (m_index.table->id == 2 && level == 1) + err= DB_CORRUPTION;); + if (err || !level) + break; + level--; + } + ut_ad(err == DB_SUCCESS || !mtr.has_modifications()); + mtr.commit(); + m_index.lock.x_unlock(); + m_root->page.unfix(); + return err; +} + +/** check whether any user table exist in system tablespace +@retval DB_SUCCESS_LOCKED_REC if user table exist +@retval DB_SUCCESS if no user table exist +@retval DB_CORRUPTION if any error encountered */ +static dberr_t user_tables_exists() noexcept +{ + mtr_t mtr; + btr_pcur_t pcur; + dberr_t err= DB_SUCCESS; + mtr.start(); + for (const rec_t *rec= dict_startscan_system(&pcur, &mtr, + dict_sys.sys_tables); + rec; rec= dict_getnext_system(&pcur, &mtr)) + { + const byte *field= nullptr; + ulint len= 0; + if (rec_get_deleted_flag(rec, 0)) + { +corrupt: + sql_print_error("InnoDB: Encountered corrupted record in SYS_TABLES"); + err= DB_CORRUPTION; + goto func_exit; + } + field= rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__SPACE, &len); + if (len != 4) + goto corrupt; + if (mach_read_from_4(field) != 0) + continue; + field= rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__ID, &len); + if (len != 8) + goto corrupt; + if (!dict_sys.is_sys_table(mach_read_from_8(field))) + { + err= DB_SUCCESS_LOCKED_REC; + btr_pcur_close(&pcur); + goto func_exit; + } + } +func_exit: + mtr.commit(); + return err; +} + +dberr_t fil_space_t::defragment() noexcept +{ + ut_ad(this == fil_system.sys_space); + dberr_t err= user_tables_exists(); + if (err == DB_SUCCESS_LOCKED_REC) + { + sql_print_information( + "InnoDB: User table exists in the system tablespace." + "Please try to move the data from system tablespace " + "to separate tablespace before defragment the " + "system tablespace."); + return DB_SUCCESS; + } else if (err) { return err; } + + SpaceDefragmenter defragmenter; + err= defragmenter.find_new_extents(); + /* There is no free extent exist */ + if (err == DB_SUCCESS_LOCKED_REC) + return DB_SUCCESS; + + if (err == DB_SUCCESS) + err= defragmenter.defragment_system_tables(); + return err; +} + void fsp_system_tablespace_truncate(bool shutdown) { ut_ad(!purge_sys.enabled()); @@ -4030,6 +5769,16 @@ void fsp_system_tablespace_truncate(bool shutdown) return; } + if (!shutdown) + { + err= space->defragment(); + if (err) + { + srv_sys_space.set_shrink_fail(); + return; + } + } + mtr_t mtr; mtr.start(); mtr.x_lock_space(space); @@ -4064,7 +5813,11 @@ err_exit: fil_system.set_use_doublewrite(false); buf_block_t *header= nullptr; - ut_ad(!fsp_tablespace_validate(space)); +#ifdef UNIV_DEBUG + mtr.start(); + ut_ad(!fsp_tablespace_validate(space, &mtr)); + mtr.commit(); +#endif /* UNIV_DEBUG */ mtr.start(); mtr.x_lock_space(space); @@ -4090,7 +5843,7 @@ err_exit: UINT32PF " to " UINT32PF " pages", space->size, last_used_extent); - header= fsp_get_latched_xdes_page( + header= fsp_get_latched_page( page_id_t(space->id, 0), &mtr, &err); if (!header) goto err_exit; @@ -4132,7 +5885,11 @@ mtr_max: old_xdes_list.restore(&mtr); mtr.discard_modifications(); mtr.commit(); - ut_ad(!fsp_tablespace_validate(space)); +#ifdef UNIV_DEBUG + mtr.start(); + ut_ad(!fsp_tablespace_validate(space, &mtr)); + mtr.commit(); +#endif /* UNIV_DEBUG */ sql_print_error( "InnoDB: Cannot shrink the system tablespace " "because the mini-transaction log size (%zu bytes) " @@ -4206,7 +5963,7 @@ func_exit: UINT32PF " to " UINT32PF " pages", space->size, last_used_extent); - buf_block_t *header= fsp_get_latched_xdes_page( + buf_block_t *header= fsp_get_latched_page( page_id_t(space->id, 0), &mtr, &err); if (!header) goto func_exit; diff --git a/storage/innobase/fut/fut0lst.cc b/storage/innobase/fut/fut0lst.cc index ff876801242..11d424ae303 100644 --- a/storage/innobase/fut/fut0lst.cc +++ b/storage/innobase/fut/fut0lst.cc @@ -409,45 +409,56 @@ dberr_t flst_remove(buf_block_t *base, uint16_t boffset, return err; } -#ifdef UNIV_DEBUG /** Validate a file-based list. */ -void flst_validate(const buf_block_t *base, uint16_t boffset, mtr_t *mtr) +dberr_t flst_validate(const buf_block_t *base, uint16_t boffset, + mtr_t *mtr) noexcept { - ut_ad(boffset < base->physical_size()); + if (boffset >= base->physical_size()) + return DB_CORRUPTION; + ut_ad(mtr->memo_contains_flagged(base, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); const uint32_t len= flst_get_len(base->page.frame + boffset); fil_addr_t addr= flst_get_first(base->page.frame + boffset); + dberr_t err= DB_SUCCESS; for (uint32_t i= len; i--; ) { - ut_ad(addr.boffset >= FIL_PAGE_DATA); - ut_ad(addr.boffset < base->physical_size() - FIL_PAGE_DATA_END); + if (addr.boffset < FIL_PAGE_DATA || + addr.boffset >= base->physical_size() - FIL_PAGE_DATA_END) + return DB_CORRUPTION; const buf_block_t *b= buf_page_get_gen(page_id_t(base->page.id().space(), addr.page), - base->zip_size(), RW_SX_LATCH, nullptr, BUF_GET, mtr); - ut_ad(b); + base->zip_size(), RW_SX_LATCH, nullptr, BUF_GET, mtr, + &err); + if (!b) + return err; addr= flst_get_next_addr(b->page.frame + addr.boffset); mtr->release_last_page(); } - ut_ad(addr.page == FIL_NULL); + if (addr.page != FIL_NULL) + return DB_CORRUPTION; addr= flst_get_last(base->page.frame + boffset); for (uint32_t i= len; i--; ) { - ut_ad(addr.boffset >= FIL_PAGE_DATA); - ut_ad(addr.boffset < base->physical_size() - FIL_PAGE_DATA_END); + if (addr.boffset < FIL_PAGE_DATA || + addr.boffset >= base->physical_size() - FIL_PAGE_DATA_END) + return DB_CORRUPTION; const buf_block_t *b= buf_page_get_gen(page_id_t(base->page.id().space(), addr.page), - base->zip_size(), RW_SX_LATCH, nullptr, BUF_GET, mtr); - ut_ad(b); + base->zip_size(), RW_SX_LATCH, nullptr, BUF_GET, mtr, + &err); + if (!b) + return err; addr= flst_get_prev_addr(b->page.frame + addr.boffset); mtr->release_last_page(); } - ut_ad(addr.page == FIL_NULL); + if (addr.page != FIL_NULL) + return DB_CORRUPTION; + return err; } -#endif diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 751f8744052..afe3f3eedcc 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -1504,6 +1504,14 @@ public: bool load_sys_tables() noexcept; /** Create or check system tables on startup */ dberr_t create_or_check_sys_tables() noexcept; + + bool is_sys_table(table_id_t table_id) const noexcept + { + return (table_id > 0 && table_id <= 4) || + table_id == sys_foreign->id || + table_id == sys_foreign_cols->id || + table_id == sys_virtual->id; + } }; /** the data dictionary cache */ diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index cd1aa8ca336..4e0744b2207 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1021,6 +1021,12 @@ public: @param shutdown called during slow shutdown @return error code */ dberr_t garbage_collect(bool shutdown); + + /** Move InnoDB system tables closer to the start of + the tablespace. + @return error code + @retval DB_SUCCESS on successful operation */ + dberr_t defragment() noexcept; private: /** @return whether the file is usable for io() */ ATTRIBUTE_COLD bool prepare_acquired() noexcept; diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h index dc8806a5c74..4a701fa0d8b 100644 --- a/storage/innobase/include/fut0lst.h +++ b/storage/innobase/include/fut0lst.h @@ -161,9 +161,8 @@ inline fil_addr_t flst_get_prev_addr(const flst_node_t *node) void flst_write_addr(const buf_block_t &block, byte *faddr, uint32_t page, uint16_t boffset, mtr_t *mtr); -# ifdef UNIV_DEBUG /** Validate a file-based list. */ -void flst_validate(const buf_block_t *base, uint16_t boffset, mtr_t *mtr); -# endif +dberr_t flst_validate(const buf_block_t *base, uint16_t boffset, + mtr_t *mtr) noexcept; #endif /* !UNIV_INNOCHECKSUM */