mirror of
https://github.com/MariaDB/server.git
synced 2026-01-06 05:22:24 +03:00
MDEV-31088 Server freeze due to innodb_change_buffering
A 3-thread deadlock has been frequently observed when using innodb_change_buffering!=none and innodb_file_per_table=0: (1) ibuf_merge_or_delete_for_page() holding an exclusive latch on the block and waiting for an exclusive tablespace latch in fseg_page_is_allocated() (2) btr_free_but_not_root() in fseg_free_step() waiting for an exclusive tablespace latch (3) fsp_alloc_free_page() holding the exclusive tablespace latch and waiting for a latch on the block, which it is reallocating for something else While this was reproduced using innodb_file_per_table=0, this hang should be theoretically possible in .ibd files as well, when the recovery or cleanup of a failed DROP INDEX or ADD INDEX is executing concurrently with something that involves page allocation. ibuf_merge_or_delete_for_page(): Avoid invoking fseg_page_is_allocated() when block==nullptr. The call was redundant in this case, and it could cause deadlocks due to latching order violation. ibuf_read_merge_pages(): Acquire an exclusive tablespace latch before invoking buf_page_get_gen(), which may cause fseg_page_is_allocated() to be invoked in ibuf_merge_or_delete_for_page(). Note: This will not fix all latching order violations in this area! Deadlocks involving ibuf_merge_or_delete_for_page(block!=nullptr) are still possible if the caller is not acquiring an exclusive tablespace latch upfront. This would be the case in any read operation that involves a change buffer merge, such as SELECT, CHECK TABLE, or any DML operation that cannot be buffered in the change buffer.
This commit is contained in:
@@ -2363,6 +2363,7 @@ tablespace_deleted:
|
||||
}
|
||||
|
||||
const ulint zip_size = s->zip_size(), size = s->size;
|
||||
s->x_lock();
|
||||
s->release();
|
||||
mtr_t mtr;
|
||||
|
||||
@@ -2380,13 +2381,17 @@ tablespace_deleted:
|
||||
|| !page_is_leaf(block->page.frame);
|
||||
mtr.commit();
|
||||
if (err == DB_TABLESPACE_DELETED) {
|
||||
s->x_unlock();
|
||||
goto tablespace_deleted;
|
||||
}
|
||||
if (!remove) {
|
||||
s->x_unlock();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
s->x_unlock();
|
||||
|
||||
if (srv_shutdown_state == SRV_SHUTDOWN_NONE
|
||||
|| srv_fast_shutdown) {
|
||||
continue;
|
||||
@@ -2415,7 +2420,7 @@ tablespace_deleted:
|
||||
/* Prevent an infinite loop, by removing entries from
|
||||
the change buffer in the case the bitmap bits were
|
||||
wrongly clear even though buffered changes exist. */
|
||||
ibuf_delete_recs(page_id_t(space_ids[i], page_nos[i]));
|
||||
ibuf_delete_recs(page_id_t(space_id, page_nos[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4193,25 +4198,26 @@ dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block,
|
||||
|
||||
ibuf_mtr_commit(&mtr);
|
||||
|
||||
if (bitmap_bits
|
||||
&& DB_SUCCESS
|
||||
if (!bitmap_bits) {
|
||||
done:
|
||||
/* No changes are buffered for this page. */
|
||||
space->release();
|
||||
return DB_SUCCESS;
|
||||
}
|
||||
|
||||
if (!block
|
||||
|| DB_SUCCESS
|
||||
== fseg_page_is_allocated(space, page_id.page_no())) {
|
||||
ibuf_mtr_start(&mtr);
|
||||
mtr.set_named_space(space);
|
||||
ibuf_reset_bitmap(block, page_id, zip_size, &mtr);
|
||||
ibuf_mtr_commit(&mtr);
|
||||
bitmap_bits = 0;
|
||||
if (!block
|
||||
|| btr_page_get_index_id(block->page.frame)
|
||||
!= DICT_IBUF_ID_MIN + IBUF_SPACE_ID) {
|
||||
ibuf_delete_recs(page_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (!bitmap_bits) {
|
||||
/* No changes are buffered for this page. */
|
||||
space->release();
|
||||
return DB_SUCCESS;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user