From 03a9366c73263b62a847372ca42fc8a65ae4bc54 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Fri, 26 May 2023 16:04:02 +0400 Subject: [PATCH 01/15] Extra tests for MDEV-30483 After upgrade to 10.6 from Mysql 5.7 seeing "InnoDB: Column last_update in table mysql.innodb_table_stats is BINARY(4) NOT NULL but should be INT UNSIGNED NOT NULL" Adding tests demonstrating that columns: - mysql.innodb_table_stats.last_update - mysql.innodb_index_stats.last_update contain sane values close to NOW() rathar than a garbage. Tests cover these three underlying TIMESTAMP data formats: - MariaDB Field_timestamp0 - UINT4 based Like in a MariaDB native installation running with mysql56_temporal_format=0 - MariaDB Field_timestampf - BINARY(4) based, with UNSIGNED_FLAG Like in a MariaDB native installation running with mysql56_temporal_format=1 - MySQL-alike Field_timestampf - BINARY(4) based, without UNSIGNED_FLAG Like with a MariaDB server running over a MySQL-5.6 directory (e.g. during a migragion). --- mysql-test/suite/innodb/r/stat_tables.result | 56 +++++++++++++++++++ mysql-test/suite/innodb/t/stat_tables.test | 53 ++++++++++++++++++ .../type_mysql_timestamp_stat_tables.result | 35 ++++++++++++ .../type_mysql_timestamp_stat_tables.test | 38 +++++++++++++ 4 files changed, 182 insertions(+) diff --git a/mysql-test/suite/innodb/r/stat_tables.result b/mysql-test/suite/innodb/r/stat_tables.result index c1ce6fc8fce..d91b854d347 100644 --- a/mysql-test/suite/innodb/r/stat_tables.result +++ b/mysql-test/suite/innodb/r/stat_tables.result @@ -26,4 +26,60 @@ UPDATE mysql.innodb_table_stats SET last_update=NULL WHERE table_name='t1'; XA END 'test'; XA ROLLBACK 'test'; DROP TABLE t1; +# +# MDEV-30483 After upgrade to 10.6 from Mysql 5.7 seeing "InnoDB: Column last_update in table mysql.innodb_table_stats is BINARY(4) NOT NULL but should be INT UNSIGNED NOT NULL" +# +# +# Testing a non-default format: Field_timestamp0 - UINT4 based +# +SET @@global.mysql56_temporal_format=0; +ALTER TABLE mysql.innodb_table_stats MODIFY last_update TIMESTAMP; +ALTER TABLE mysql.innodb_index_stats MODIFY last_update TIMESTAMP; +SHOW COLUMNS FROM mysql.innodb_table_stats LIKE 'last_update'; +Field Type Null Key Default Extra +last_update timestamp /* mariadb-5.3 */ NO current_timestamp() on update current_timestamp() +SHOW COLUMNS FROM mysql.innodb_index_stats LIKE 'last_update'; +Field Type Null Key Default Extra +last_update timestamp /* mariadb-5.3 */ NO current_timestamp() on update current_timestamp() +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB STATS_PERSISTENT=1; +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_table_stats +WHERE database_name='test' AND table_name='t1'; +TIMESTAMPDIFF(DAY,last_update,now())<=1 +1 +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_index_stats +WHERE database_name='test' AND table_name='t1' AND stat_name='size'; +TIMESTAMPDIFF(DAY,last_update,now())<=1 +1 +DROP TABLE t1; +# +# Now as the table t1 is dropped, expect no statistics +# +SELECT * FROM mysql.innodb_table_stats +WHERE database_name='test' AND table_name='t1'; +database_name table_name last_update n_rows clustered_index_size sum_of_other_index_sizes +SELECT * FROM mysql.innodb_index_stats +WHERE database_name='test' AND table_name='t1' AND stat_name='size'; +database_name table_name index_name last_update stat_name stat_value sample_size stat_description +# +# Testing with the default format: Field_timestampf - BINARY(4) based with the UNSIGNED_FLAG +# +SET @@global.mysql56_temporal_format=1; +ALTER TABLE mysql.innodb_table_stats MODIFY last_update TIMESTAMP; +ALTER TABLE mysql.innodb_index_stats MODIFY last_update TIMESTAMP; +SHOW COLUMNS FROM mysql.innodb_table_stats LIKE 'last_update'; +Field Type Null Key Default Extra +last_update timestamp NO current_timestamp() on update current_timestamp() +SHOW COLUMNS FROM mysql.innodb_index_stats LIKE 'last_update'; +Field Type Null Key Default Extra +last_update timestamp NO current_timestamp() on update current_timestamp() +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB STATS_PERSISTENT=1; +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_table_stats +WHERE database_name='test' AND table_name='t1'; +TIMESTAMPDIFF(DAY,last_update,now())<=1 +1 +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_index_stats +WHERE database_name='test' AND table_name='t1' AND stat_name='size'; +TIMESTAMPDIFF(DAY,last_update,now())<=1 +1 +DROP TABLE t1; # End of 10.6 tests diff --git a/mysql-test/suite/innodb/t/stat_tables.test b/mysql-test/suite/innodb/t/stat_tables.test index dd18c265e99..359fb1e00ec 100644 --- a/mysql-test/suite/innodb/t/stat_tables.test +++ b/mysql-test/suite/innodb/t/stat_tables.test @@ -28,4 +28,57 @@ XA END 'test'; XA ROLLBACK 'test'; DROP TABLE t1; +--echo # +--echo # MDEV-30483 After upgrade to 10.6 from Mysql 5.7 seeing "InnoDB: Column last_update in table mysql.innodb_table_stats is BINARY(4) NOT NULL but should be INT UNSIGNED NOT NULL" +--echo # + +# The following tests demonstrate that these columns: +# - innodb_table_stats.last_update +# - innodb_index_stats.last_update +# have sane values close to NOW(), rather than any garbage, +# with all TIMESTAMP formats. + +--echo # +--echo # Testing a non-default format: Field_timestamp0 - UINT4 based +--echo # + +SET @@global.mysql56_temporal_format=0; +ALTER TABLE mysql.innodb_table_stats MODIFY last_update TIMESTAMP; +ALTER TABLE mysql.innodb_index_stats MODIFY last_update TIMESTAMP; +SHOW COLUMNS FROM mysql.innodb_table_stats LIKE 'last_update'; +SHOW COLUMNS FROM mysql.innodb_index_stats LIKE 'last_update'; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB STATS_PERSISTENT=1; + +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_table_stats +WHERE database_name='test' AND table_name='t1'; +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_index_stats +WHERE database_name='test' AND table_name='t1' AND stat_name='size'; +DROP TABLE t1; + +--echo # +--echo # Now as the table t1 is dropped, expect no statistics +--echo # + +SELECT * FROM mysql.innodb_table_stats +WHERE database_name='test' AND table_name='t1'; +SELECT * FROM mysql.innodb_index_stats +WHERE database_name='test' AND table_name='t1' AND stat_name='size'; + +--echo # +--echo # Testing with the default format: Field_timestampf - BINARY(4) based with the UNSIGNED_FLAG +--echo # + +SET @@global.mysql56_temporal_format=1; +ALTER TABLE mysql.innodb_table_stats MODIFY last_update TIMESTAMP; +ALTER TABLE mysql.innodb_index_stats MODIFY last_update TIMESTAMP; +SHOW COLUMNS FROM mysql.innodb_table_stats LIKE 'last_update'; +SHOW COLUMNS FROM mysql.innodb_index_stats LIKE 'last_update'; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB STATS_PERSISTENT=1; +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_table_stats +WHERE database_name='test' AND table_name='t1'; +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_index_stats +WHERE database_name='test' AND table_name='t1' AND stat_name='size'; +DROP TABLE t1; + + --echo # End of 10.6 tests diff --git a/plugin/type_mysql_timestamp/mysql-test/type_mysql_timestamp/type_mysql_timestamp_stat_tables.result b/plugin/type_mysql_timestamp/mysql-test/type_mysql_timestamp/type_mysql_timestamp_stat_tables.result index e48f29c9a26..381daff5497 100644 --- a/plugin/type_mysql_timestamp/mysql-test/type_mysql_timestamp/type_mysql_timestamp_stat_tables.result +++ b/plugin/type_mysql_timestamp/mysql-test/type_mysql_timestamp/type_mysql_timestamp_stat_tables.result @@ -71,3 +71,38 @@ innodb_index_stats CREATE TABLE `innodb_index_stats` ( PRIMARY KEY (`database_name`,`table_name`,`index_name`,`stat_name`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin STATS_PERSISTENT=0 SET @@global.innodb_stats_persistent=1; +# +# Testing MySQL-5.6-alike Field_timestampf: BINARY(4) based, without UNSIGNED_FLAG +# +ALTER TABLE mysql.innodb_table_stats MODIFY last_update TYPE_MYSQL_TIMESTAMP; +ALTER TABLE mysql.innodb_index_stats MODIFY last_update TYPE_MYSQL_TIMESTAMP; +SHOW COLUMNS FROM mysql.innodb_table_stats LIKE 'last_update'; +Field Type Null Key Default Extra +last_update type_mysql_timestamp NO current_timestamp() on update current_timestamp() +SHOW COLUMNS FROM mysql.innodb_index_stats LIKE 'last_update'; +Field Type Null Key Default Extra +last_update type_mysql_timestamp NO current_timestamp() on update current_timestamp() +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB STATS_PERSISTENT=1; +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_table_stats +WHERE database_name='test' AND table_name='t1'; +TIMESTAMPDIFF(DAY,last_update,now())<=1 +1 +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_index_stats +WHERE database_name='test' AND table_name='t1' AND stat_name='size'; +TIMESTAMPDIFF(DAY,last_update,now())<=1 +1 +DROP TABLE t1; +# +# Now as the table t1 is dropped, expect no statistics +# +SELECT * FROM mysql.innodb_table_stats +WHERE database_name='test' AND table_name='t1'; +database_name table_name last_update n_rows clustered_index_size sum_of_other_index_sizes +SELECT * FROM mysql.innodb_index_stats +WHERE database_name='test' AND table_name='t1' AND stat_name='size'; +database_name table_name index_name last_update stat_name stat_value sample_size stat_description +# +# Restore the structure of the tables +# +ALTER TABLE mysql.innodb_table_stats MODIFY last_update TIMESTAMP; +ALTER TABLE mysql.innodb_index_stats MODIFY last_update TIMESTAMP; diff --git a/plugin/type_mysql_timestamp/mysql-test/type_mysql_timestamp/type_mysql_timestamp_stat_tables.test b/plugin/type_mysql_timestamp/mysql-test/type_mysql_timestamp/type_mysql_timestamp_stat_tables.test index d22c94c6f82..22d8ed4e97e 100644 --- a/plugin/type_mysql_timestamp/mysql-test/type_mysql_timestamp/type_mysql_timestamp_stat_tables.test +++ b/plugin/type_mysql_timestamp/mysql-test/type_mysql_timestamp/type_mysql_timestamp_stat_tables.test @@ -22,3 +22,41 @@ SHOW CREATE TABLE mysql.innodb_table_stats; ALTER TABLE mysql.innodb_index_stats MODIFY last_update TIMESTAMP; SHOW CREATE TABLE mysql.innodb_index_stats; SET @@global.innodb_stats_persistent=1; + + +# The following test demonstrate that these columns: +# - innodb_table_stats.last_update +# - innodb_index_stats.last_update +# have sane values close to NOW(), rather than any garbage, +# with MySQL-alike Field_timestampf + +--echo # +--echo # Testing MySQL-5.6-alike Field_timestampf: BINARY(4) based, without UNSIGNED_FLAG +--echo # + +ALTER TABLE mysql.innodb_table_stats MODIFY last_update TYPE_MYSQL_TIMESTAMP; +ALTER TABLE mysql.innodb_index_stats MODIFY last_update TYPE_MYSQL_TIMESTAMP; +SHOW COLUMNS FROM mysql.innodb_table_stats LIKE 'last_update'; +SHOW COLUMNS FROM mysql.innodb_index_stats LIKE 'last_update'; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB STATS_PERSISTENT=1; +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_table_stats +WHERE database_name='test' AND table_name='t1'; +SELECT TIMESTAMPDIFF(DAY,last_update,now())<=1 FROM mysql.innodb_index_stats +WHERE database_name='test' AND table_name='t1' AND stat_name='size'; +DROP TABLE t1; + +--echo # +--echo # Now as the table t1 is dropped, expect no statistics +--echo # + +SELECT * FROM mysql.innodb_table_stats +WHERE database_name='test' AND table_name='t1'; +SELECT * FROM mysql.innodb_index_stats +WHERE database_name='test' AND table_name='t1' AND stat_name='size'; + +--echo # +--echo # Restore the structure of the tables +--echo # + +ALTER TABLE mysql.innodb_table_stats MODIFY last_update TIMESTAMP; +ALTER TABLE mysql.innodb_index_stats MODIFY last_update TIMESTAMP; From db8765500eaa83464106c7d68be2c647e034fef2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 26 May 2023 16:16:10 +0300 Subject: [PATCH 02/15] MDEV-31343 Another server hang with innodb_undo_log_truncate=ON trx_purge_truncate_history(): While waiting for a write-fixed block to become available, simply wait for an exclusive latch on it. Also, simplify the iteration: first check for oldest_modification>2 (to ignore clean pages or pages belonging to the temporary tablespace) and then compare the tablespace identifier. Before releasing buf_pool.flush_list_mutex we will buffer-fix the block of interest. In that way, buf_page_t::can_relocate() will not hold on the block and it must remain in the buffer pool until we have acquired an exclusive latch on it. If the block is still dirty, we will register it with the tablespace truncation mini-transaction; else, we will simply release the latch and buffer-fix and move to the next block. This also reverts commit c4d79399895827c592d12b7be4b7ef21443d3a0f because that fix should no longer be necessary; the wait for an exclusive block latch should allow buf_pool_t::release_freed_page() on the same block to proceed. Tested by: Axel Schwenke, Matthias Leich --- storage/innobase/trx/trx0purge.cc | 42 ++++++++++++++++--------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 239dcfb6438..3cb5e9574fb 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -685,6 +685,7 @@ not_free: mtr_t mtr; mtr.start(); mtr.x_lock_space(&space); + const auto space_id= space.id; /* Lock all modified pages of the tablespace. @@ -694,8 +695,8 @@ not_free: mini-transaction commit and the server was killed, then discarding the to-be-trimmed pages without flushing would break crash recovery. */ + rescan: mysql_mutex_lock(&buf_pool.flush_list_mutex); - for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; ) { ut_ad(bpage->oldest_modification()); @@ -703,46 +704,47 @@ not_free: buf_page_t *prev= UT_LIST_GET_PREV(list, bpage); - if (bpage->id().space() == space.id && - bpage->oldest_modification() != 1) + if (bpage->oldest_modification() > 2 && bpage->id().space() == space_id) { ut_ad(bpage->frame); - auto block= reinterpret_cast(bpage); - if (!bpage->lock.x_lock_try()) + bpage->fix(); { - rescan: - /* Let buf_pool_t::release_freed_page() proceed. */ + /* Try to acquire an exclusive latch while the cache line is + fresh after fix(). */ + const bool got_lock{bpage->lock.x_lock_try()}; + buf_pool.flush_hp.set(prev); mysql_mutex_unlock(&buf_pool.flush_list_mutex); - mysql_mutex_lock(&buf_pool.mutex); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - mysql_mutex_unlock(&buf_pool.mutex); - bpage= UT_LIST_GET_LAST(buf_pool.flush_list); - continue; + if (!got_lock) + bpage->lock.x_lock(); } - buf_pool.flush_hp.set(prev); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); #ifdef BTR_CUR_HASH_ADAPT - ut_ad(!block->index); /* There is no AHI on undo tablespaces. */ + /* There is no AHI on undo tablespaces. */ + ut_ad(!reinterpret_cast(bpage)->index); #endif - bpage->fix(); ut_ad(!bpage->is_io_fixed()); - mysql_mutex_lock(&buf_pool.flush_list_mutex); + ut_ad(bpage->id().space() == space_id); - if (bpage->oldest_modification() > 1) + if (bpage->oldest_modification() > 2) { + mtr.memo_push(reinterpret_cast(bpage), + MTR_MEMO_PAGE_X_FIX); + mysql_mutex_lock(&buf_pool.flush_list_mutex); + ut_ad(bpage->oldest_modification() > 2); bpage->reset_oldest_modification(); - mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); } else { bpage->unfix(); bpage->lock.x_unlock(); + mysql_mutex_lock(&buf_pool.flush_list_mutex); } if (prev != buf_pool.flush_hp.get()) - /* Rescan, because we may have lost the position. */ + { + mysql_mutex_unlock(&buf_pool.flush_list_mutex); goto rescan; + } } bpage= prev; From e38c075aa0fd7527698430431f8629b01accf980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 26 May 2023 16:39:46 +0300 Subject: [PATCH 03/15] MDEV-31346 trx_purge_add_undo_to_history() is not optimal trx_undo_set_state_at_finish(): Merge to its only caller, trx_purge_add_undo_to_history(). trx_purge_add_undo_to_history(): Evaluate the condition related to TRX_UNDO_STATE only once. Tested by: Matthias Leich --- storage/innobase/include/trx0undo.h | 8 -- storage/innobase/trx/trx0purge.cc | 204 ++++++++++++++-------------- storage/innobase/trx/trx0undo.cc | 31 ----- 3 files changed, 99 insertions(+), 144 deletions(-) diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index 670fe00c25b..c1435930551 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -216,14 +216,6 @@ buf_block_t* trx_undo_assign_low(trx_t *trx, trx_rseg_t *rseg, trx_undo_t **undo, mtr_t *mtr, dberr_t *err) MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Sets the state of the undo log segment at a transaction finish. -@return undo log segment header page, x-latched */ -buf_block_t* -trx_undo_set_state_at_finish( -/*=========================*/ - trx_undo_t* undo, /*!< in: undo log memory copy */ - mtr_t* mtr); /*!< in: mtr */ /** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK. @param[in,out] trx transaction diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 3cb5e9574fb..7ce6d4ea61d 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -246,127 +246,121 @@ Remove the undo log segment from the rseg slot if it is too big for reuse. void trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) { - DBUG_PRINT("trx", ("commit(" TRX_ID_FMT "," TRX_ID_FMT ")", - trx->id, trx_id_t{trx->rw_trx_hash_element->no})); - ut_ad(undo == trx->rsegs.m_redo.undo); - trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; - ut_ad(undo->rseg == rseg); - buf_block_t* rseg_header = rseg->get(mtr, nullptr); - /* We are in transaction commit; we cannot return an error. If the - database is corrupted, it is better to crash it than to - intentionally violate ACID by committing something that is known to - be corrupted. */ - ut_ad(rseg_header); - buf_block_t* undo_page = trx_undo_set_state_at_finish( - undo, mtr); - trx_ulogf_t* undo_header = undo_page->page.frame - + undo->hdr_offset; + DBUG_PRINT("trx", ("commit(" TRX_ID_FMT "," TRX_ID_FMT ")", + trx->id, trx_id_t{trx->rw_trx_hash_element->no})); + ut_ad(undo->id < TRX_RSEG_N_SLOTS); + ut_ad(undo == trx->rsegs.m_redo.undo); + trx_rseg_t *rseg= trx->rsegs.m_redo.rseg; + ut_ad(undo->rseg == rseg); + buf_block_t *rseg_header= rseg->get(mtr, nullptr); + /* We are in transaction commit; we cannot return an error. If the + database is corrupted, it is better to crash it than to + intentionally violate ACID by committing something that is known to + be corrupted. */ + ut_ad(rseg_header); + buf_block_t *undo_page= + buf_page_get(page_id_t(rseg->space->id, undo->hdr_page_no), 0, + RW_X_LATCH, mtr); + /* This function is invoked during transaction commit, which is not + allowed to fail. If we get a corrupted undo header, we will crash here. */ + ut_a(undo_page); + trx_ulogf_t *undo_header= undo_page->page.frame + undo->hdr_offset; - ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1); - ut_ad(rseg->needs_purge > trx->id); + ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1); + ut_ad(rseg->needs_purge > trx->id); - if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT - + rseg_header->page.frame))) { - /* This database must have been upgraded from - before MariaDB 10.3.5. */ - trx_rseg_format_upgrade(rseg_header, mtr); - } + if (rseg->last_page_no == FIL_NULL) + { + rseg->last_page_no= undo->hdr_page_no; + rseg->set_last_commit(undo->hdr_offset, trx->rw_trx_hash_element->no); + } - if (undo->state != TRX_UNDO_CACHED) { - /* The undo log segment will not be reused */ - ut_a(undo->id < TRX_RSEG_N_SLOTS); - static_assert(FIL_NULL == 0xffffffff, ""); - mtr->memset(rseg_header, - TRX_RSEG + TRX_RSEG_UNDO_SLOTS - + undo->id * TRX_RSEG_SLOT_SIZE, 4, 0xff); + rseg->history_size++; - uint32_t hist_size = mach_read_from_4( - TRX_RSEG_HISTORY_SIZE + TRX_RSEG - + rseg_header->page.frame); + if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + + rseg_header->page.frame))) + /* This database must have been upgraded from before MariaDB 10.3.5. */ + trx_rseg_format_upgrade(rseg_header, mtr); - ut_ad(undo->size == flst_get_len(TRX_UNDO_SEG_HDR - + TRX_UNDO_PAGE_LIST - + undo_page->page.frame)); + uint16_t undo_state; - mtr->write<4>(*rseg_header, TRX_RSEG + TRX_RSEG_HISTORY_SIZE - + rseg_header->page.frame, - hist_size + undo->size); - mtr->write<8>(*rseg_header, TRX_RSEG + TRX_RSEG_MAX_TRX_ID - + rseg_header->page.frame, - trx_sys.get_max_trx_id()); - } + if (undo->size == 1 && + TRX_UNDO_PAGE_REUSE_LIMIT > + mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + + undo_page->page.frame)) + { + undo->state= undo_state= TRX_UNDO_CACHED; + UT_LIST_ADD_FIRST(rseg->undo_cached, undo); + } + else + { + ut_ad(undo->size == flst_get_len(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + + undo_page->page.frame)); + /* The undo log segment will not be reused */ + static_assert(FIL_NULL == 0xffffffff, ""); + mtr->memset(rseg_header, TRX_RSEG + TRX_RSEG_UNDO_SLOTS + + undo->id * TRX_RSEG_SLOT_SIZE, 4, 0xff); + uint32_t hist_size= mach_read_from_4(TRX_RSEG_HISTORY_SIZE + TRX_RSEG + + rseg_header->page.frame); + mtr->write<4>(*rseg_header, TRX_RSEG + TRX_RSEG_HISTORY_SIZE + + rseg_header->page.frame, hist_size + undo->size); + mtr->write<8>(*rseg_header, TRX_RSEG + TRX_RSEG_MAX_TRX_ID + + rseg_header->page.frame, trx_sys.get_max_trx_id()); + ut_free(undo); + undo_state= TRX_UNDO_TO_PURGE; + } - /* After the purge thread has been given permission to exit, - we may roll back transactions (trx->undo_no==0) - in THD::cleanup() invoked from unlink_thd() in fast shutdown, - or in trx_rollback_recovered() in slow shutdown. + undo= nullptr; - Before any transaction-generating background threads or the - purge have been started, we can - start transactions in row_merge_drop_temp_indexes(), - and roll back recovered transactions. + /* After the purge thread has been given permission to exit, + we may roll back transactions (trx->undo_no==0) + in THD::cleanup() invoked from unlink_thd() in fast shutdown, + or in trx_rollback_recovered() in slow shutdown. - Arbitrary user transactions may be executed when all the undo log - related background processes (including purge) are disabled due to - innodb_force_recovery=2 or innodb_force_recovery=3. - DROP TABLE may be executed at any innodb_force_recovery level. + Before any transaction-generating background threads or the purge + have been started, we can start transactions in + row_merge_drop_temp_indexes(), and roll back recovered transactions. - During fast shutdown, we may also continue to execute - user transactions. */ - ut_ad(srv_undo_sources - || trx->undo_no == 0 - || (!purge_sys.enabled() - && (srv_is_being_started - || trx_rollback_is_active - || srv_force_recovery >= SRV_FORCE_NO_BACKGROUND)) - || srv_fast_shutdown); + Arbitrary user transactions may be executed when all the undo log + related background processes (including purge) are disabled due to + innodb_force_recovery=2 or innodb_force_recovery=3. DROP TABLE may + be executed at any innodb_force_recovery level. -#ifdef WITH_WSREP - if (wsrep_is_wsrep_xid(&trx->xid)) { - trx_rseg_update_wsrep_checkpoint(rseg_header, &trx->xid, mtr); - } + During fast shutdown, we may also continue to execute user + transactions. */ + ut_ad(srv_undo_sources || trx->undo_no == 0 || + (!purge_sys.enabled() && + (srv_is_being_started || + trx_rollback_is_active || + srv_force_recovery >= SRV_FORCE_NO_BACKGROUND)) || + srv_fast_shutdown); + +#ifdef WITH_WSREP + if (wsrep_is_wsrep_xid(&trx->xid)) + trx_rseg_update_wsrep_checkpoint(rseg_header, &trx->xid, mtr); #endif - if (trx->mysql_log_file_name && *trx->mysql_log_file_name) { - /* Update the latest MySQL binlog name and offset info - in rollback segment header if MySQL binlogging is on - or the database server is a MySQL replication save. */ - trx_rseg_update_binlog_offset(rseg_header, trx, mtr); - } + if (trx->mysql_log_file_name && *trx->mysql_log_file_name) + /* Update the latest binlog name and offset if log_bin=ON or this + is a replica. */ + trx_rseg_update_binlog_offset(rseg_header, trx, mtr); - /* Add the log as the first in the history list */ + /* Add the log as the first in the history list */ - /* We are in transaction commit; we cannot return an error - when detecting corruption. It is better to crash the server - than to intentionally violate ACID by committing something - that is known to be corrupted. */ - ut_a(flst_add_first(rseg_header, TRX_RSEG + TRX_RSEG_HISTORY, undo_page, - static_cast(undo->hdr_offset - + TRX_UNDO_HISTORY_NODE), - mtr) == DB_SUCCESS); + /* We are in transaction commit; we cannot return an error + when detecting corruption. It is better to crash the server + than to intentionally violate ACID by committing something + that is known to be corrupted. */ + ut_a(flst_add_first(rseg_header, TRX_RSEG + TRX_RSEG_HISTORY, undo_page, + uint16_t(page_offset(undo_header) + + TRX_UNDO_HISTORY_NODE), mtr) == DB_SUCCESS); - mtr->write<8,mtr_t::MAYBE_NOP>(*undo_page, - undo_header + TRX_UNDO_TRX_NO, - trx->rw_trx_hash_element->no); - mtr->write<2,mtr_t::MAYBE_NOP>(*undo_page, undo_header - + TRX_UNDO_NEEDS_PURGE, 1U); - - if (rseg->last_page_no == FIL_NULL) { - rseg->last_page_no = undo->hdr_page_no; - rseg->set_last_commit(undo->hdr_offset, - trx->rw_trx_hash_element->no); - } - - rseg->history_size++; - - if (undo->state == TRX_UNDO_CACHED) { - UT_LIST_ADD_FIRST(rseg->undo_cached, undo); - } else { - ut_ad(undo->state == TRX_UNDO_TO_PURGE); - ut_free(undo); - } - - undo = NULL; + mtr->write<2>(*undo_page, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + + undo_page->page.frame, undo_state); + mtr->write<8,mtr_t::MAYBE_NOP>(*undo_page, undo_header + TRX_UNDO_TRX_NO, + trx->rw_trx_hash_element->no); + mtr->write<2,mtr_t::MAYBE_NOP>(*undo_page, undo_header + + TRX_UNDO_NEEDS_PURGE, 1U); } /** Free an undo log segment. diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 20434d9fb9c..4811d2380aa 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -1463,37 +1463,6 @@ template buf_block_t* trx_undo_assign_low(trx_t *trx, trx_rseg_t *rseg, trx_undo_t **undo, mtr_t *mtr, dberr_t *err); -/******************************************************************//** -Sets the state of the undo log segment at a transaction finish. -@return undo log segment header page, x-latched */ -buf_block_t* -trx_undo_set_state_at_finish( -/*=========================*/ - trx_undo_t* undo, /*!< in: undo log memory copy */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(undo->id < TRX_RSEG_N_SLOTS); - ut_ad(undo->rseg->is_persistent()); - - buf_block_t *block= - buf_page_get(page_id_t(undo->rseg->space->id, undo->hdr_page_no), 0, - RW_X_LATCH, mtr); - /* This function is invoked during transaction commit, which is not - allowed to fail. If we get a corrupted undo header, we will crash here. */ - ut_a(block); - const uint16_t state = undo->size == 1 && - TRX_UNDO_PAGE_REUSE_LIMIT > - mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + - block->page.frame) - ? TRX_UNDO_CACHED - : TRX_UNDO_TO_PURGE; - - undo->state= state; - mtr->write<2>(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + block->page.frame, - state); - return block; -} - /** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK. @param[in,out] trx transaction @param[in,out] undo undo log From 7b72fc0a57ec594f70837bde470048a0d55383d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 26 May 2023 16:40:02 +0300 Subject: [PATCH 04/15] MDEV-22739 !cursor->index->is_committed() in row0ins.cc row_ins_sec_index_entry_by_modify(): When noticing a corrupted secondary index on which CREATE INDEX is not in progress, return DB_CORRUPTION instead of intentionally crashing the server. Tested by: Matthias Leich --- storage/innobase/row/row0ins.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index bd998094f42..3188c8c27ce 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -214,14 +214,14 @@ row_ins_sec_index_entry_by_modify( made to the clustered index, and completed the secondary index creation before we got here. In this case, the change would already be there. The CREATE - INDEX should be waiting for a MySQL meta-data lock - upgrade at least until this INSERT or UPDATE - returns. After that point, set_committed(true) - would be invoked in commit_inplace_alter_table(). */ + INDEX should be in wait_while_table_is_used() at least + until this INSERT or UPDATE returns. After that point, + set_committed(true) would be invoked in + commit_inplace_alter_table(). */ ut_a(update->n_fields == 0); - ut_a(!cursor->index()->is_committed()); ut_ad(!dict_index_is_online_ddl(cursor->index())); - return(DB_SUCCESS); + return cursor->index()->is_committed() + ? DB_CORRUPTION : DB_SUCCESS; } if (mode == BTR_MODIFY_LEAF) { From ce547cfc0564bf54e73bcc5171a2212d6650eb00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 26 May 2023 16:40:07 +0300 Subject: [PATCH 05/15] MDEV-31350: Hang in innodb.recovery_memory buf_flush_page_cleaner(): Whenever buf_pool.ran_out(), invoke buf_pool.get_oldest_modification(0) so that all clean blocks will be removed from buf_pool.flush_list and buf_flush_LRU_list_batch() will be able to evict some pages. This fixes a regression that was likely caused by commit a55b951e6082a4ce9a1f2ed5ee176ea7dbbaf1f2 (MDEV-26827). --- storage/innobase/buf/buf0flu.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 67c79702ec8..9eeecbd1302 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -2330,6 +2330,7 @@ static void buf_flush_page_cleaner() else if (buf_pool.ran_out()) { buf_pool.page_cleaner_set_idle(false); + buf_pool.get_oldest_modification(0); mysql_mutex_unlock(&buf_pool.flush_list_mutex); n= srv_max_io_capacity; mysql_mutex_lock(&buf_pool.mutex); From ea66df2f454a09f8182b0951723ef63dcee2332a Mon Sep 17 00:00:00 2001 From: Otto Kekalainen Date: Sat, 27 May 2023 22:42:59 -0700 Subject: [PATCH 06/15] Deb: Fix blocksize check to use $mysql_datadir/$datadir correctly In commit f99a8918 this line was changed to not use awk, and new version copied both to init file and preinst file but overlooking that they use different variable names. Also fix minor syntax issues to make Shellcheck happy. All new code of the whole pull request, including one or several files that are either new files or modified ones, are contributed under the BSD-new license. I am contributing on behalf of my employer Amazon Web Services, Inc. --- debian/mariadb-server-10.6.mariadb.init | 2 +- debian/mariadb-server-10.6.preinst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/mariadb-server-10.6.mariadb.init b/debian/mariadb-server-10.6.mariadb.init index b2b7142084f..f68fbc99bc2 100644 --- a/debian/mariadb-server-10.6.mariadb.init +++ b/debian/mariadb-server-10.6.mariadb.init @@ -86,7 +86,7 @@ sanity_checks() { datadir=`mariadbd_get_param datadir` # As preset blocksize of GNU df is 1024 then available bytes is $df_available_blocks * 1024 # 4096 blocks is then lower than 4 MB - df_available_blocks=`LC_ALL=C BLOCKSIZE= df --output=avail "$datadir" | tail -n 1` + df_available_blocks="$(LC_ALL=C BLOCKSIZE='' df --output=avail "$datadir" | tail -n 1)" if [ "$df_available_blocks" -lt "4096" ]; then log_failure_msg "$0: ERROR: The partition with $datadir is too full!" echo "ERROR: The partition with $datadir is too full!" | $ERR_LOGGER diff --git a/debian/mariadb-server-10.6.preinst b/debian/mariadb-server-10.6.preinst index 9a31b5c636e..2513e3ab021 100644 --- a/debian/mariadb-server-10.6.preinst +++ b/debian/mariadb-server-10.6.preinst @@ -226,7 +226,7 @@ fi # As preset blocksize of GNU df is 1024 then available bytes is $df_available_blocks * 1024 # 4096 blocks is then lower than 4 MB -df_available_blocks=`LC_ALL=C BLOCKSIZE= df --output=avail "$datadir" | tail -n 1` +df_available_blocks="$(LC_ALL=C BLOCKSIZE='' df --output=avail "$mysql_datadir" | tail -n 1)" if [ "$df_available_blocks" -lt "4096" ] then echo "ERROR: There's not enough space in $mysql_datadir/" 1>&2 From a6c0a2769663850c055e41a83b0cca9dd9ab89a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 30 May 2023 17:21:49 +0300 Subject: [PATCH 07/15] MDEV-31362 recv_sys_t::apply(bool): Assertion `!last_batch || recovered_lsn == scanned_lsn' failed recv_sys_t::apply(): Remove a bogus debug assertion that had been added in commit f2c17cc9d9bcd634887846d3064bcb71243f9cc0 (MDEV-29911). It is perfectly normal that when the server was killed in the middle of writing multiple redo log blocks, the recovery would end such that recv_sys.scanned_lsn will point to the end of the last complete 512-byte log block, but recv_sys.recovered_lsn will be less than that. Also, correct the function comment of recv_sys_t::parse(). --- storage/innobase/include/log0recv.h | 2 +- storage/innobase/log/log0recv.cc | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 2e4a4bfa794..79e1f862cf5 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -394,7 +394,7 @@ public: bool add(map::iterator it, lsn_t start_lsn, lsn_t lsn, const byte *l, size_t len); - /** Parse and register one mini-transaction in log_t::FORMAT_10_5. + /** Parse and register mini-transactions in log_t::FORMAT_10_5. @param checkpoint_lsn the log sequence number of the latest checkpoint @param store whether to store the records @param apply whether to apply file-level log records diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 1d80345a5e0..2764cc79829 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2396,7 +2396,7 @@ void recv_sys_t::rewind(const byte *end, const byte *begin) noexcept pages_it= pages.end(); } -/** Parse and register one mini-transaction in log_t::FORMAT_10_5. +/** Parse and register mini-transactions in log_t::FORMAT_10_5. @param checkpoint_lsn the log sequence number of the latest checkpoint @param store whether to store the records @param apply whether to apply file-level log records @@ -2404,7 +2404,7 @@ void recv_sys_t::rewind(const byte *end, const byte *begin) noexcept or corruption was noticed */ bool recv_sys_t::parse(lsn_t checkpoint_lsn, store_t *store, bool apply) { - restart: +restart: mysql_mutex_assert_owner(&log_sys.mutex); mysql_mutex_assert_owner(&mutex); ut_ad(parse_start_lsn); @@ -3626,7 +3626,6 @@ void recv_sys_t::apply(bool last_batch) recv_no_ibuf_operations = !last_batch || srv_operation == SRV_OPERATION_RESTORE || srv_operation == SRV_OPERATION_RESTORE_EXPORT; - ut_ad(!last_batch || recovered_lsn == scanned_lsn); progress_time= time(nullptr); report_progress(); From 30fb72ca6e27742f20003c208985eb711e12c70a Mon Sep 17 00:00:00 2001 From: Tuukka Pasanen Date: Tue, 30 May 2023 09:15:11 +0300 Subject: [PATCH 08/15] MDEV-31331: Fix cut'n'paste variable name in Debian pre-inst script There is unwanted cut'n'paste variable name in Debian pre-inst script which causes: df: '': No such file or directory /var/lib/dpkg/tmp.ci/preinst: line 215: [: : integer expression expected Rename variable to correct one and make check that that directory or symlink really exists. If it does not then fail with error and message. --- debian/mariadb-server-10.6.preinst | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/debian/mariadb-server-10.6.preinst b/debian/mariadb-server-10.6.preinst index 2513e3ab021..0b3babd3ca8 100644 --- a/debian/mariadb-server-10.6.preinst +++ b/debian/mariadb-server-10.6.preinst @@ -224,14 +224,23 @@ then mkdir -Z $mysql_datadir fi -# As preset blocksize of GNU df is 1024 then available bytes is $df_available_blocks * 1024 -# 4096 blocks is then lower than 4 MB -df_available_blocks="$(LC_ALL=C BLOCKSIZE='' df --output=avail "$mysql_datadir" | tail -n 1)" -if [ "$df_available_blocks" -lt "4096" ] +# Check if MariaDB datadir is available if not fails. +# There should be symlink or directory available or something will fail. +if [ -d "$mysql_datadir" ] || [ -L "$mysql_datadir" ] then - echo "ERROR: There's not enough space in $mysql_datadir/" 1>&2 - db_stop - exit 1 + # As preset blocksize of GNU df is 1024 then available bytes is $df_available_blocks * 1024 + # 4096 blocks is then lower than 4 MB + df_available_blocks="$(LC_ALL=C BLOCKSIZE='' df --output=avail "$mysql_datadir" | tail -n 1)" + if [ "$df_available_blocks" -lt "4096" ] + then + echo "ERROR: There's not enough space in $mysql_datadir/" 1>&2 + db_stop + exit 1 + fi +else + echo "ERROR: There's no directory or symlink available: $mysql_datadir/" 1>&2 + db_stop + exit 1 fi # Since the home directory was created before putting the user into From eb20e7c9008b343b2441e9587149ac58de1f221e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 31 May 2023 15:20:54 +0300 Subject: [PATCH 09/15] MDEV-31353 InnoDB recovery hangs after reporting corruption recv_recover_page(): Remove some code which was added in commit 0b47c126e31cddda1e94588799599e138400bcf8 with no good reason and which would cause a hang after a corrupted page was reported during crash recovery. Tested by: Matthias Leich --- storage/innobase/log/log0recv.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 2764cc79829..44680e9e485 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -3076,9 +3076,6 @@ set_start_lsn: || recv_sys.is_corrupt_log()) && !srv_force_recovery) { if (init) { init->created = false; - if (space || block->page.id().page_no()) { - block->page.lock.x_lock_recursive(); - } } mtr.discard_modifications(); From e3b06156c6ecd5d3fd4376ee025df1ab45311a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 31 May 2023 15:25:07 +0300 Subject: [PATCH 10/15] MDEV-31347 fil_ibd_create() may hijack the file handle of an old file fil_ibd_create(): Hold fil_system.mutex until fil_node_t::find_metadata() has completed, so that node->handle cannot be closed by a concurrent thread. This race condition was introduced in commit 10dd290b4b8b8b235c8cf42e100f0a4415629e79 (MDEV-17380). Tested by: Matthias Leich --- storage/innobase/fil/fil0fil.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 7bd813b0c0d..e08d8a25171 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2001,8 +2001,8 @@ err_exit: FIL_TYPE_TABLESPACE, crypt_data, mode, true)) { fil_node_t* node = space->add(path, file, size, false, true); - mysql_mutex_unlock(&fil_system.mutex); IF_WIN(node->find_metadata(), node->find_metadata(file, true)); + mysql_mutex_unlock(&fil_system.mutex); mtr.start(); mtr.set_named_space(space); ut_a(fsp_header_init(space, size, &mtr) == DB_SUCCESS); From 5919f7b675835b5a4eec7f921ed024a7ab2de58c Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Wed, 31 May 2023 19:07:41 +0530 Subject: [PATCH 11/15] MDEV-31264 Purge trying to access freed secondary index page - InnoDB purge tries to access aborted secondary index and access the freed secondary index root page. --- .../suite/parts/r/partition_purge.result | 26 +++++++++++++ mysql-test/suite/parts/t/partition_purge.opt | 1 + mysql-test/suite/parts/t/partition_purge.test | 37 +++++++++++++++++++ storage/innobase/row/row0purge.cc | 7 ++++ 4 files changed, 71 insertions(+) create mode 100644 mysql-test/suite/parts/r/partition_purge.result create mode 100644 mysql-test/suite/parts/t/partition_purge.opt create mode 100644 mysql-test/suite/parts/t/partition_purge.test diff --git a/mysql-test/suite/parts/r/partition_purge.result b/mysql-test/suite/parts/r/partition_purge.result new file mode 100644 index 00000000000..072b141cd8d --- /dev/null +++ b/mysql-test/suite/parts/r/partition_purge.result @@ -0,0 +1,26 @@ +CREATE TABLE t1(f1 INT, f2 INT, INDEX(f1))ENGINE=InnoDB +PARTITION BY LIST(f1) ( +PARTITION p1 VALUES in (1, 2, 3), +PARTITION p2 VALUES in (4, 5, 6)); +INSERT INTO t1 VALUES(1, 1), (1, 1), (6, 1); +connect con1,localhost,root,,,; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +connect con2,localhost,root,,,; +SET DEBUG_SYNC="innodb_rollback_inplace_alter_table SIGNAL default_resume WAIT_FOR alter_resume"; +ALTER TABLE t1 ADD UNIQUE INDEX(f1); +connection default; +set DEBUG_SYNC="now WAIT_FOR default_resume"; +SET DEBUG_SYNC="innodb_row_update_for_mysql_begin SIGNAL alter_resume WAIT_FOR alter_finish"; +DELETE FROM t1; +connection con2; +ERROR 23000: Duplicate entry '1' for key 'f1_2' +SET DEBUG_SYNC="now SIGNAL alter_finish"; +connection default; +connection con1; +commit; +connection default; +disconnect con1; +disconnect con2; +InnoDB 0 transactions not purged +drop table t1; +SET DEBUG_SYNC=reset; diff --git a/mysql-test/suite/parts/t/partition_purge.opt b/mysql-test/suite/parts/t/partition_purge.opt new file mode 100644 index 00000000000..a39e5228c9d --- /dev/null +++ b/mysql-test/suite/parts/t/partition_purge.opt @@ -0,0 +1 @@ +--innodb_purge_threads=1 diff --git a/mysql-test/suite/parts/t/partition_purge.test b/mysql-test/suite/parts/t/partition_purge.test new file mode 100644 index 00000000000..2df81b0eb77 --- /dev/null +++ b/mysql-test/suite/parts/t/partition_purge.test @@ -0,0 +1,37 @@ +--source include/have_innodb.inc +--source include/have_partition.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +CREATE TABLE t1(f1 INT, f2 INT, INDEX(f1))ENGINE=InnoDB + PARTITION BY LIST(f1) ( + PARTITION p1 VALUES in (1, 2, 3), + PARTITION p2 VALUES in (4, 5, 6)); +INSERT INTO t1 VALUES(1, 1), (1, 1), (6, 1); +connect(con1,localhost,root,,,); +START TRANSACTION WITH CONSISTENT SNAPSHOT; + +connect(con2,localhost,root,,,); +SET DEBUG_SYNC="innodb_rollback_inplace_alter_table SIGNAL default_resume WAIT_FOR alter_resume"; +send ALTER TABLE t1 ADD UNIQUE INDEX(f1); + +connection default; +set DEBUG_SYNC="now WAIT_FOR default_resume"; +SET DEBUG_SYNC="innodb_row_update_for_mysql_begin SIGNAL alter_resume WAIT_FOR alter_finish"; +send DELETE FROM t1; + +connection con2; +--error ER_DUP_ENTRY +reap; +SET DEBUG_SYNC="now SIGNAL alter_finish"; + +connection default; +reap; +connection con1; +commit; +connection default; +disconnect con1; +disconnect con2; +--source ../../innodb/include/wait_all_purged.inc +drop table t1; +SET DEBUG_SYNC=reset; diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index f4db252b069..92403fa739d 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -615,6 +615,8 @@ row_purge_del_mark( const auto type= node->index->type; if (type & (DICT_FTS | DICT_CORRUPT)) continue; + if (node->index->online_status > ONLINE_INDEX_CREATION) + continue; if (UNIV_UNLIKELY(DICT_VIRTUAL & type) && !node->index->is_committed() && node->index->has_new_v_col()) continue; @@ -767,6 +769,11 @@ row_purge_upd_exist_or_extern_func( continue; } + if (node->index->online_status + > ONLINE_INDEX_CREATION) { + continue; + } + if (row_upd_changes_ord_field_binary(node->index, node->update, thr, NULL, NULL)) { /* Build the older version of the index entry */ From 3aea77edeb472753da3c8a57f39fa70c7f960847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 1 Jun 2023 09:41:17 +0300 Subject: [PATCH 12/15] MDEV-31347 fil_ibd_create() may hijack the file handle of an old file fil_space_t::add(): If a file handle was passed, invoke fil_node_t::find_metadata() before releasing fil_system.mutex. The call was moved from fil_ibd_create(). This is a 10.5 version of commit e3b06156c6ecd5d3fd4376ee025df1ab45311a6d from 10.6. --- storage/innobase/fil/fil0fil.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 58261d27d8e..c75144413ac 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -350,6 +350,7 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle, this->size += size; UT_LIST_ADD_LAST(chain, node); if (node->is_open()) { + node->find_metadata(node->handle); n_pending.fetch_and(~CLOSING, std::memory_order_relaxed); if (++fil_system.n_open >= srv_max_n_open_files) { reacquire(); @@ -2433,7 +2434,6 @@ err_exit: mtr.log_file_op(FILE_CREATE, space_id, node->name); mtr.commit(); - node->find_metadata(file); *err = DB_SUCCESS; return space; } From bb9da13baf5e5a4a435408fc05fd46253a00ea69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 1 Jun 2023 12:11:18 +0300 Subject: [PATCH 13/15] MDEV-31373 innodb_undo_log_truncate=ON recovery results in a corrupted undo log recv_sys_t::apply(): When applying an undo log truncation operation, invoke os_file_truncate() on space->recv_size, which must not be less than the original truncated file size. Alternatively, as pointed out by Thirunarayanan Balathandayuthapani, we could assign space->size = t.pages, so that fil_system_t::extend_to_recv_size() would extend the file back to space->recv_size. --- storage/innobase/log/log0recv.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 86e7f43015c..16022c8dd0e 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2690,10 +2690,12 @@ void recv_sys_t::apply(bool last_batch) if (fil_space_t *space = fil_space_get(id + srv_undo_space_id_start)) { ut_ad(UT_LIST_GET_LEN(space->chain) == 1); + ut_ad(space->recv_size >= t.pages); fil_node_t *file= UT_LIST_GET_FIRST(space->chain); ut_ad(file->is_open()); os_file_truncate(file->name, file->handle, - os_offset_t{t.pages} << srv_page_size_shift, true); + os_offset_t{space->recv_size} << + srv_page_size_shift, true); } } } From 8a86df37ef024c85d8a049b237fd331ac09c2683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 2 Jun 2023 10:44:34 +0300 Subject: [PATCH 14/15] MDEV-31088 Server freeze due to innodb_change_buffering A 3-thread deadlock has been frequently observed when using innodb_change_buffering!=none and innodb_file_per_table=0: (1) ibuf_merge_or_delete_for_page() holding an exclusive latch on the block and waiting for an exclusive tablespace latch in fseg_page_is_allocated() (2) btr_free_but_not_root() in fseg_free_step() waiting for an exclusive tablespace latch (3) fsp_alloc_free_page() holding the exclusive tablespace latch and waiting for a latch on the block, which it is reallocating for something else While this was reproduced using innodb_file_per_table=0, this hang should be theoretically possible in .ibd files as well, when the recovery or cleanup of a failed DROP INDEX or ADD INDEX is executing concurrently with something that involves page allocation. ibuf_merge_or_delete_for_page(): Avoid invoking fseg_page_is_allocated() when block==nullptr. The call was redundant in this case, and it could cause deadlocks due to latching order violation. ibuf_read_merge_pages(): Acquire an exclusive tablespace latch before invoking buf_page_get_gen(), which may cause fseg_page_is_allocated() to be invoked in ibuf_merge_or_delete_for_page(). Note: This will not fix all latching order violations in this area! Deadlocks involving ibuf_merge_or_delete_for_page(block!=nullptr) are still possible if the caller is not acquiring an exclusive tablespace latch upfront. This would be the case in any read operation that involves a change buffer merge, such as SELECT, CHECK TABLE, or any DML operation that cannot be buffered in the change buffer. --- storage/innobase/ibuf/ibuf0ibuf.cc | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index a176f5d1bea..d1d6176720a 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -2363,6 +2363,7 @@ tablespace_deleted: } const ulint zip_size = s->zip_size(), size = s->size; + s->x_lock(); s->release(); mtr_t mtr; @@ -2380,13 +2381,17 @@ tablespace_deleted: || !page_is_leaf(block->page.frame); mtr.commit(); if (err == DB_TABLESPACE_DELETED) { + s->x_unlock(); goto tablespace_deleted; } if (!remove) { + s->x_unlock(); continue; } } + s->x_unlock(); + if (srv_shutdown_state == SRV_SHUTDOWN_NONE || srv_fast_shutdown) { continue; @@ -2415,7 +2420,7 @@ tablespace_deleted: /* Prevent an infinite loop, by removing entries from the change buffer in the case the bitmap bits were wrongly clear even though buffered changes exist. */ - ibuf_delete_recs(page_id_t(space_ids[i], page_nos[i])); + ibuf_delete_recs(page_id_t(space_id, page_nos[i])); } } @@ -4193,25 +4198,26 @@ dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block, ibuf_mtr_commit(&mtr); - if (bitmap_bits - && DB_SUCCESS + if (!bitmap_bits) { + done: + /* No changes are buffered for this page. */ + space->release(); + return DB_SUCCESS; + } + + if (!block + || DB_SUCCESS == fseg_page_is_allocated(space, page_id.page_no())) { ibuf_mtr_start(&mtr); mtr.set_named_space(space); ibuf_reset_bitmap(block, page_id, zip_size, &mtr); ibuf_mtr_commit(&mtr); - bitmap_bits = 0; if (!block || btr_page_get_index_id(block->page.frame) != DICT_IBUF_ID_MIN + IBUF_SPACE_ID) { ibuf_delete_recs(page_id); } - } - - if (!bitmap_bits) { - /* No changes are buffered for this page. */ - space->release(); - return DB_SUCCESS; + goto done; } } From f569e06e03a7efa6050258a8d167de0aaa4e124c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 2 Jun 2023 11:06:09 +0300 Subject: [PATCH 15/15] MDEV-31385 Change buffer stale entries leads to corruption while reusing page buf_page_free(): If buffered changes existed for the page, drop them. Co-developed with Thirunarayanan Balathandayuthapani --- storage/innobase/buf/buf0buf.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 693826917c9..90886173b1b 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2151,6 +2151,8 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr) } block->page.lock.x_lock(); + if (block->page.is_ibuf_exist()) + ibuf_merge_or_delete_for_page(nullptr, page_id, block->page.zip_size()); #ifdef BTR_CUR_HASH_ADAPT if (block->index) btr_search_drop_page_hash_index(block, false);