From da3e9edafb51452fbff3aa0ac0e96ed6c55c4464 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Mon, 7 Apr 2025 08:47:46 +0200 Subject: [PATCH] MDEV-34705: Binlog-in-engine: Fix race between reader and flush A reader could latch a page that was currently being flushed to disk, while the flushing thread is temporarily releasing the mutex. If the page was complete with data when the flushing started, the flush thread would not correctly wait for the reader to release the latch, and the page could be freed while the reader was still using it. Also adjust a couple assertions to reflect the addition of the file header page as page 0. Signed-off-by: Kristian Nielsen --- mysql-test/suite/binlog_in_engine/my.cnf | 2 ++ storage/innobase/fsp/fsp_binlog.cc | 24 ++++++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/mysql-test/suite/binlog_in_engine/my.cnf b/mysql-test/suite/binlog_in_engine/my.cnf index 35b697ec0c9..b6a2d13c4ea 100644 --- a/mysql-test/suite/binlog_in_engine/my.cnf +++ b/mysql-test/suite/binlog_in_engine/my.cnf @@ -3,6 +3,7 @@ [mysqld.1] innodb binlog-storage-engine=innodb +max_binlog_size=8M innodb-binlog-state-interval=128k log-basename= master @@ -10,6 +11,7 @@ log-basename= master #!use-slave-opt innodb binlog-storage-engine=innodb +max_binlog_size=8M innodb-binlog-state-interval=128k log-slave-updates log-basename= slave diff --git a/storage/innobase/fsp/fsp_binlog.cc b/storage/innobase/fsp/fsp_binlog.cc index 716a4dc0a50..58fdb92f11b 100644 --- a/storage/innobase/fsp/fsp_binlog.cc +++ b/storage/innobase/fsp/fsp_binlog.cc @@ -292,8 +292,8 @@ fsp_binlog_page_fifo::flush_one_page(uint64_t file_no, bool force) size_t res= crc32_pwrite_page(fh, e->page_buf, page_no, MYF(MY_WME)); ut_a(res == ibb_page_size); mysql_mutex_lock(&m_mutex); - if (!is_complete && - (UNIV_UNLIKELY(e->latched) || UNIV_UNLIKELY(!e->flushed_clean))) + if (UNIV_UNLIKELY(e->latched) || + (!is_complete && UNIV_UNLIKELY(!e->flushed_clean))) { flushing= false; pthread_cond_broadcast(&m_cond); @@ -322,8 +322,20 @@ fsp_binlog_page_fifo::flush_one_page(uint64_t file_no, bool force) my_cond_wait(&m_cond, &m_mutex.m_mutex); } flushing= true; - is_complete= e->complete; - goto retry; + if (!is_complete) + { + /* + The page was not complete, a writer may have added data. Need to redo + the flush. + */ + is_complete= e->complete; + goto retry; + } + /* + The page was complete, but was latched while we were flushing (by a + reader). No need to flush again, just needed to wait until the latch + was released before we can continue to free the page. + */ } } /* @@ -1331,7 +1343,7 @@ binlog_chunk_reader::fetch_current_page() binlog_cur_end_offset[s.file_no&1].load(std::memory_order_acquire); if (s.file_no > active) { - ut_ad(s.page_no == 0); + ut_ad(s.page_no == 1); ut_ad(s.in_page_offset == 0); /* Allow a reader that reached the very end of the active binlog file to @@ -1712,7 +1724,7 @@ bool binlog_chunk_reader::data_available() uint64_t active= active_binlog_file_no.load(std::memory_order_acquire); if (active != s.file_no) { - ut_ad(active > s.file_no || (s.page_no == 0 && s.in_page_offset == 0)); + ut_ad(active > s.file_no || (s.page_no == 1 && s.in_page_offset == 0)); return active > s.file_no; } uint64_t end_offset=