From 8a86df37ef024c85d8a049b237fd331ac09c2683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 2 Jun 2023 10:44:34 +0300 Subject: [PATCH] MDEV-31088 Server freeze due to innodb_change_buffering A 3-thread deadlock has been frequently observed when using innodb_change_buffering!=none and innodb_file_per_table=0: (1) ibuf_merge_or_delete_for_page() holding an exclusive latch on the block and waiting for an exclusive tablespace latch in fseg_page_is_allocated() (2) btr_free_but_not_root() in fseg_free_step() waiting for an exclusive tablespace latch (3) fsp_alloc_free_page() holding the exclusive tablespace latch and waiting for a latch on the block, which it is reallocating for something else While this was reproduced using innodb_file_per_table=0, this hang should be theoretically possible in .ibd files as well, when the recovery or cleanup of a failed DROP INDEX or ADD INDEX is executing concurrently with something that involves page allocation. ibuf_merge_or_delete_for_page(): Avoid invoking fseg_page_is_allocated() when block==nullptr. The call was redundant in this case, and it could cause deadlocks due to latching order violation. ibuf_read_merge_pages(): Acquire an exclusive tablespace latch before invoking buf_page_get_gen(), which may cause fseg_page_is_allocated() to be invoked in ibuf_merge_or_delete_for_page(). Note: This will not fix all latching order violations in this area! Deadlocks involving ibuf_merge_or_delete_for_page(block!=nullptr) are still possible if the caller is not acquiring an exclusive tablespace latch upfront. This would be the case in any read operation that involves a change buffer merge, such as SELECT, CHECK TABLE, or any DML operation that cannot be buffered in the change buffer. --- storage/innobase/ibuf/ibuf0ibuf.cc | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index a176f5d1bea..d1d6176720a 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -2363,6 +2363,7 @@ tablespace_deleted: } const ulint zip_size = s->zip_size(), size = s->size; + s->x_lock(); s->release(); mtr_t mtr; @@ -2380,13 +2381,17 @@ tablespace_deleted: || !page_is_leaf(block->page.frame); mtr.commit(); if (err == DB_TABLESPACE_DELETED) { + s->x_unlock(); goto tablespace_deleted; } if (!remove) { + s->x_unlock(); continue; } } + s->x_unlock(); + if (srv_shutdown_state == SRV_SHUTDOWN_NONE || srv_fast_shutdown) { continue; @@ -2415,7 +2420,7 @@ tablespace_deleted: /* Prevent an infinite loop, by removing entries from the change buffer in the case the bitmap bits were wrongly clear even though buffered changes exist. */ - ibuf_delete_recs(page_id_t(space_ids[i], page_nos[i])); + ibuf_delete_recs(page_id_t(space_id, page_nos[i])); } } @@ -4193,25 +4198,26 @@ dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block, ibuf_mtr_commit(&mtr); - if (bitmap_bits - && DB_SUCCESS + if (!bitmap_bits) { + done: + /* No changes are buffered for this page. */ + space->release(); + return DB_SUCCESS; + } + + if (!block + || DB_SUCCESS == fseg_page_is_allocated(space, page_id.page_no())) { ibuf_mtr_start(&mtr); mtr.set_named_space(space); ibuf_reset_bitmap(block, page_id, zip_size, &mtr); ibuf_mtr_commit(&mtr); - bitmap_bits = 0; if (!block || btr_page_get_index_id(block->page.frame) != DICT_IBUF_ID_MIN + IBUF_SPACE_ID) { ibuf_delete_recs(page_id); } - } - - if (!bitmap_bits) { - /* No changes are buffered for this page. */ - space->release(); - return DB_SUCCESS; + goto done; } }