diff --git a/mysql-test/suite/binlog_in_engine/binlog_flush_purge.result b/mysql-test/suite/binlog_in_engine/binlog_flush_purge.result index f0c42d6ee0f..c95d3612698 100644 --- a/mysql-test/suite/binlog_in_engine/binlog_flush_purge.result +++ b/mysql-test/suite/binlog_in_engine/binlog_flush_purge.result @@ -18,11 +18,11 @@ binlog-000001.ibb 262144 FLUSH BINARY LOGS; SHOW BINARY LOGS; Log_name File_size -binlog-000000.ibb 36864 +binlog-000000.ibb 40960 binlog-000001.ibb 262144 binlog-000002.ibb 262144 SET STATEMENT sql_log_bin=0 FOR -CALL mtr.add_suppression("InnoDB: Page corruption in binlog tablespace file page number 0"); +CALL mtr.add_suppression("InnoDB: Page corruption in binlog tablespace file page number 1"); FLUSH BINARY LOGS; FLUSH BINARY LOGS; SHOW BINLOG EVENTS IN 'binlog-000000.ibb' LIMIT 1; @@ -83,7 +83,7 @@ binlog-000022.ibb 262144 binlog-000023.ibb 262144 binlog-000024.ibb 262144 SET @now= NOW(); -*** Do 187 inserts ... +*** Do 149 inserts ... PURGE BINARY LOGS BEFORE @now; SHOW BINARY LOGS; Log_name File_size diff --git a/mysql-test/suite/binlog_in_engine/binlog_flush_purge.test b/mysql-test/suite/binlog_in_engine/binlog_flush_purge.test index 3384538dc59..16bacce0f66 100644 --- a/mysql-test/suite/binlog_in_engine/binlog_flush_purge.test +++ b/mysql-test/suite/binlog_in_engine/binlog_flush_purge.test @@ -27,22 +27,23 @@ FLUSH BINARY LOGS; SHOW BINARY LOGS; # Flush couple logs so we are sure the first file is on disk. -# Corrupt one bit in the first page of the first file to test that crc32 +# Corrupt one bit in the first data page of the first file to test that crc32 # mismatch is caught. SET STATEMENT sql_log_bin=0 FOR - CALL mtr.add_suppression("InnoDB: Page corruption in binlog tablespace file page number 0"); + CALL mtr.add_suppression("InnoDB: Page corruption in binlog tablespace file page number 1"); FLUSH BINARY LOGS; FLUSH BINARY LOGS; --let $file= binlog-000000.ibb --let $datadir= `SELECT @@datadir` --let BINLOG_FILE= $datadir/$file perl; +my $pos= 4096 + 50; # Early byte in page 1 (page 0 is file header). open F, '+<', $ENV{BINLOG_FILE} or die $!; -sysseek F, 50, 0 or die $!; +sysseek F, $pos, 0 or die $!; my $x; sysread F, $x, 1 or die $!; $x= chr(ord($x) ^ (1 <<3)); -sysseek F, 50, 0 or die $!; +sysseek F, $pos, 0 or die $!; syswrite F, $x, 1 or die $!; EOF @@ -146,7 +147,7 @@ SHOW BINARY LOGS; --sleep 1 SET @now= NOW(); --sleep 1 ---let $num_insert= `SELECT floor(256*1.5*1024/2100)` +--let $num_insert= `SELECT floor(256*1.2*1024/2100)` --echo *** Do $num_insert inserts ... --disable_query_log BEGIN; diff --git a/mysql-test/suite/binlog_in_engine/recovery.test b/mysql-test/suite/binlog_in_engine/recovery.test index b90303b9b17..bf81777e3b6 100644 --- a/mysql-test/suite/binlog_in_engine/recovery.test +++ b/mysql-test/suite/binlog_in_engine/recovery.test @@ -57,6 +57,6 @@ EOF --source include/wait_until_connected_again.inc --let $binlog_file= ---let $binlog_start= 0 +--let $binlog_start= 4 --source include/show_binlog_events.inc DROP TABLE t1; diff --git a/storage/innobase/fsp/fsp_binlog.cc b/storage/innobase/fsp/fsp_binlog.cc index 6a3bf3ad9d4..821754390ee 100644 --- a/storage/innobase/fsp/fsp_binlog.cc +++ b/storage/innobase/fsp/fsp_binlog.cc @@ -58,9 +58,10 @@ ulong ibb_page_size= (1 << ibb_page_size_shift); This value must be used over the setting innodb_binlog_state_interval, because after a restart the latest binlog file will be using the value of the setting prior to the restart; the new value of the setting (if different) - will be used for newly created binlog files. + will be used for newly created binlog files. The value refers to the file + of active_binlog_file_no. */ -uint32_t current_binlog_state_interval; +uint64_t current_binlog_state_interval; /* Mutex protecting active_binlog_file_no. @@ -583,7 +584,7 @@ fsp_binlog_page_fifo::flush_thread_run() if (all_flushed && file_no <= first_file_no) all_flushed= flush_one_page(file_no + 1, false); } - if (all_flushed) + if (all_flushed && !flush_thread_end) my_cond_wait(&m_cond, &m_mutex.m_mutex); } @@ -597,21 +598,30 @@ size_t crc32_pwrite_page(File fd, byte *buf, uint32_t page_no, myf MyFlags) noexcept { const uint32_t payload= (uint32_t)ibb_page_size - BINLOG_PAGE_CHECKSUM; - mach_write_to_4(buf + payload, my_crc32c(0, buf, payload)); + int4store(buf + payload, my_crc32c(0, buf, payload)); return my_pwrite(fd, (const uchar *)buf, ibb_page_size, (my_off_t)page_no << ibb_page_size_shift, MyFlags); } -size_t +/* + Read a page, with CRC check. + Returns: + + -1 error + 0 EOF + 1 Ok +*/ +int crc32_pread_page(File fd, byte *buf, uint32_t page_no, myf MyFlags) noexcept { - size_t res= my_pread(fd, buf, ibb_page_size, - (my_off_t)page_no << ibb_page_size_shift, MyFlags); - if (UNIV_LIKELY(res == ibb_page_size)) + size_t read= my_pread(fd, buf, ibb_page_size, + (my_off_t)page_no << ibb_page_size_shift, MyFlags); + int res= 1; + if (UNIV_LIKELY(read == ibb_page_size)) { const uint32_t payload= (uint32_t)ibb_page_size - BINLOG_PAGE_CHECKSUM; - uint32_t crc32= mach_read_from_4(buf + payload); + uint32_t crc32= uint4korr(buf + payload); /* Allow a completely zero (empty) page as well. */ if (UNIV_UNLIKELY(crc32 != my_crc32c(0, buf, payload)) && (buf[0] != 0 || 0 != memcmp(buf, buf+1, ibb_page_size - 1))) @@ -624,10 +634,46 @@ crc32_pread_page(File fd, byte *buf, uint32_t page_no, myf MyFlags) noexcept page_no, crc32); } } + else if (read == (size_t)-1) + res= -1; + else + res= 0; + return res; } +int +crc32_pread_page(pfs_os_file_t fh, byte *buf, uint32_t page_no, myf MyFlags) + noexcept +{ + const uint32_t page_size= (uint32_t)ibb_page_size; + ulint bytes_read= 0; + dberr_t err= os_file_read(IORequestRead, fh, buf, + (os_offset_t)page_no << ibb_page_size_shift, + page_size, &bytes_read); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) + return -1; + else if (UNIV_UNLIKELY(bytes_read < page_size)) + return 0; + + const uint32_t payload= (uint32_t)ibb_page_size - BINLOG_PAGE_CHECKSUM; + uint32_t crc32= uint4korr(buf + payload); + /* Allow a completely zero (empty) page as well. */ + if (UNIV_UNLIKELY(crc32 != my_crc32c(0, buf, payload)) && + (buf[0] != 0 || 0 != memcmp(buf, buf+1, ibb_page_size - 1))) + { + my_errno= EIO; + if (MyFlags & MY_WME) + sql_print_error("InnoDB: Page corruption in binlog tablespace file " + "page number %u (invalid crc32 checksum 0x%08X)", + page_no, crc32); + return -1; + } + return 1; +} + + void binlog_write_up_to_now() noexcept { @@ -654,6 +700,36 @@ binlog_write_up_to_now() noexcept } +void +fsp_binlog_extract_header_page(const byte *page_buf, + binlog_header_data *out_header_data) noexcept +{ + uint32_t magic= uint4korr(page_buf); + uint32_t vers_major= uint4korr(page_buf + 8); + const uint32_t payload= IBB_HEADER_PAGE_SIZE - BINLOG_PAGE_CHECKSUM; + uint32_t crc32= uint4korr(page_buf + payload); + out_header_data->is_empty= false; + out_header_data->is_invalid= false; + if (crc32 != my_crc32c(0, page_buf, payload) || + magic != IBB_MAGIC || vers_major > IBB_FILE_VERS_MAJOR) + { + if (page_buf[0] == 0 && + 0 == memcmp(page_buf, page_buf+1, IBB_HEADER_PAGE_SIZE - 1)) + out_header_data->is_empty= true; + else + out_header_data->is_invalid= true; + return; + } + out_header_data->page_size_shift= uint4korr(page_buf + 4); + out_header_data->vers_major= vers_major; + out_header_data->vers_minor= uint4korr(page_buf + 12); + out_header_data->file_no= uint8korr(page_buf + 16); + out_header_data-> page_count= uint8korr(page_buf + 24); + out_header_data-> start_lsn= uint8korr(page_buf + 32); + out_header_data-> diff_state_interval= uint8korr(page_buf + 40); +} + + void fsp_log_binlog_write(mtr_t *mtr, fsp_binlog_page_entry *page, uint32_t page_offset, uint32_t len) @@ -678,6 +754,19 @@ fsp_log_binlog_write(mtr_t *mtr, fsp_binlog_page_entry *page, page_offset + &page->page_buf[0], len); } + +void +fsp_log_header_page(mtr_t *mtr, fsp_binlog_page_entry *page, uint32_t len) + noexcept +{ + uint64_t file_no= page->file_no; + uint32_t page_no= page->page_no; + ut_ad(page_no == 0); + page->complete= true; + mtr->write_binlog((file_no & 1), page_no, 0, &page->page_buf[0], len); +} + + /* Initialize the InnoDB implementation of binlog. Note that we do not create or open any binlog tablespaces here. @@ -849,6 +938,7 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type) byte cont_flag= 0; for (;;) { if (page_offset == BINLOG_PAGE_DATA) { + ut_ad(!block); uint32_t file_size_in_pages= binlog_page_fifo->size_in_pages(file_no); if (UNIV_UNLIKELY(page_no >= file_size_in_pages)) { /* @@ -859,7 +949,7 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type) The normal case is that the next tablespace is already pre-allocated and available; binlog tablespace N is active while (N+1) is being - pre-allocated. Only under extreme I/O pressure should be need to + pre-allocated. Only under extreme I/O pressure should we need to stall here. */ ut_ad(!pending_prev_end_offset); @@ -873,14 +963,24 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type) // ToDo: assert that a single write doesn't span more than two binlog files. ++file_no; + file_size_in_pages= binlog_page_fifo->size_in_pages(file_no); binlog_cur_written_offset[file_no & 1].store(0, std::memory_order_relaxed); binlog_cur_end_offset[file_no & 1].store(0, std::memory_order_relaxed); pthread_cond_signal(&active_binlog_cond); mysql_mutex_unlock(&active_binlog_mutex); binlog_cur_page_no= page_no= 0; - /* ToDo: Here we must use the value from the file, if this file was pre-allocated before a server restart where the value of innodb_binlog_state_interval changed. Maybe just make innodb_binlog_state_interval dynamic and make the prealloc thread (and discover code at startup) supply the correct value to use for each file. */ current_binlog_state_interval= - (uint32_t)(innodb_binlog_state_interval >> page_size_shift); + (uint64_t)(innodb_binlog_state_interval >> page_size_shift); + } + + /* Write the header page at the start of a binlog tablespace file. */ + if (page_no == 0) + { + lsn_t start_lsn= log_sys.get_lsn(std::memory_order_acquire); + bool err= ibb_write_header_page(mtr, file_no, file_size_in_pages, + start_lsn, current_binlog_state_interval); + ut_a(!err /* ToDo error handling */); + page_no= 1; } /* Must be a power of two. */ @@ -888,14 +988,15 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type) current_binlog_state_interval == (uint64_t)1 << (63 - nlz(current_binlog_state_interval))); - if (0 == (page_no & (current_binlog_state_interval - 1))) { - if (page_no == 0) { + if (page_no == 1 || + 0 == (page_no & (current_binlog_state_interval - 1))) { + if (page_no == 1) { rpl_binlog_state_base full_state; bool err; full_state.init(); err= load_global_binlog_state(&full_state); ut_a(!err /* ToDo error handling */); - if (UNIV_UNLIKELY(file_no == 0 && page_no == 0) && + if (UNIV_UNLIKELY(file_no == 0 && page_no == 1) && (full_state.count_nolock() == 1)) { /* @@ -932,14 +1033,14 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type) } } err= binlog_gtid_state(&full_state, mtr, block, page_no, - page_offset, file_no, file_size_in_pages); + page_offset, file_no); ut_a(!err /* ToDo error handling */); ut_ad(block); full_state.free(); binlog_diff_state.reset_nolock(); } else { bool err= binlog_gtid_state(&binlog_diff_state, mtr, block, page_no, - page_offset, file_no, file_size_in_pages); + page_offset, file_no); ut_a(!err /* ToDo error handling */); } } else @@ -1256,15 +1357,15 @@ binlog_chunk_reader::fetch_current_page() cur_file_length= ~(uint64_t)0; } ++s.file_no; - s.page_no= 0; + s.page_no= 1; /* Skip the header page. */ continue; } - size_t res= crc32_pread_page(cur_file_handle, page_buffer, s.page_no, - MYF(MY_WME)); - if (res == (size_t)-1) + int res= crc32_pread_page(cur_file_handle, page_buffer, s.page_no, + MYF(MY_WME)); + if (res < 0) return CHUNK_READER_ERROR; - if (res == 0 && my_errno == HA_ERR_FILE_TOO_SHORT) + if (res == 0) goto goto_next_file; page_ptr= page_buffer; return CHUNK_READER_FOUND; @@ -1459,7 +1560,7 @@ go_next_page: cur_file_handle= (File)-1; cur_file_length= ~(uint64_t)0; ++s.file_no; - s.page_no= 0; + s.page_no= 1; /* Skip the header page. */ } } @@ -1470,6 +1571,19 @@ go_next_page: } +int +binlog_chunk_reader::get_file_header(binlog_header_data *out_header) +{ + seek(current_file_no(), 0); + if (fetch_current_page() != CHUNK_READER_FOUND) + return -1; + fsp_binlog_extract_header_page(page_ptr, out_header); + if (out_header->is_invalid || out_header->is_empty) + return -1; + return 0; +} + + void binlog_chunk_reader::restore_pos(binlog_chunk_reader::saved_position *pos) { diff --git a/storage/innobase/handler/innodb_binlog.cc b/storage/innobase/handler/innodb_binlog.cc index ca7dd3aab3e..1c3946a273d 100644 --- a/storage/innobase/handler/innodb_binlog.cc +++ b/storage/innobase/handler/innodb_binlog.cc @@ -389,8 +389,7 @@ public: ~gtid_search(); enum Read_Result read_gtid_state_file_no(rpl_binlog_state_base *state, uint64_t file_no, uint32_t page_no, - uint64_t *out_file_end, - uint32_t *out_diff_state_interval); + uint64_t *out_file_end); int find_gtid_pos(slave_connection_state *pos, rpl_binlog_state_base *out_state, uint64_t *out_file_no, uint64_t *out_offset); @@ -491,7 +490,7 @@ public: bool close_file() noexcept; bool next_file() noexcept; bool next_page() noexcept; - void update_page_from_record(uint16_t offset, + bool update_page_from_record(uint16_t offset, const byte *buf, size_t size) noexcept; }; @@ -506,8 +505,8 @@ static int innodb_binlog_discover(); static bool binlog_state_recover(); static void innodb_binlog_autopurge(uint64_t first_open_file_no); static int read_gtid_state_from_page(rpl_binlog_state_base *state, - const byte *page, uint32_t page_no, - binlog_header_data *out_header_data); + const byte *page, uint32_t page_no) + noexcept; /* @@ -526,7 +525,6 @@ binlog_recovery::get_header(uint64_t file_no, lsn_t &out_lsn, bool &out_empty) noexcept { char full_path[OS_FILE_MAX_PATH]; - rpl_binlog_state_base dummy_state; binlog_header_data header; out_empty= true; @@ -547,14 +545,13 @@ binlog_recovery::get_header(uint64_t file_no, lsn_t &out_lsn, bool &out_empty) it as an empty file. */ const uint32_t payload= (uint32_t)ibb_page_size - BINLOG_PAGE_CHECKSUM; - uint32_t crc32= mach_read_from_4(page_buf + payload); + uint32_t crc32= uint4korr(page_buf + payload); if (UNIV_UNLIKELY(crc32 != my_crc32c(0, page_buf, payload))) return 0; - dummy_state.init(); - int res= read_gtid_state_from_page(&dummy_state, page_buf, 0, &header); - if (res <= 0) - return res; + fsp_binlog_extract_header_page(page_buf, &header); + if (header.is_invalid) + return 0; if (!header.is_empty) { out_empty= false; @@ -724,8 +721,8 @@ binlog_recovery::init_recovery_from(uint64_t file_no, lsn_t file_lsn, skipping_early_lsn= false; if (offset <= BINLOG_PAGE_DATA) { - update_page_from_record(offset, buf, size); skipping_partial_page= false; + return update_page_from_record(offset, buf, size); } } return false; @@ -852,22 +849,24 @@ binlog_recovery::zero_out_cur_file() return; /* Recover the original size from the current file. */ - size_t read= crc32_pread_page(cur_file_fh, page_buf, 0, MYF(0)); - if (read != (size_t)ibb_page_size) + int res= crc32_pread_page(cur_file_fh, page_buf, 0, MYF(0)); + if (res <= 0) { sql_print_warning("InnoDB: Could not read last binlog file during recovery"); return; } binlog_header_data header; - rpl_binlog_state_base dummy_state; - dummy_state.init(); - int res= read_gtid_state_from_page(&dummy_state, page_buf, 0, &header); - if (res <= 0) + fsp_binlog_extract_header_page(page_buf, &header); + + if (header.is_invalid) { - if (res < 0) - sql_print_warning("InnoDB: Could not read last binlog file during recovery"); - else - sql_print_warning("InnoDB: Empty binlog file header found during recovery"); + sql_print_warning("InnoDB: Invalid header page in last binlog file " + "during recovery"); + return; + } + if (header.is_empty) + { + sql_print_warning("InnoDB: Empty binlog file header found during recovery"); ut_ad(0); return; } @@ -1040,17 +1039,56 @@ binlog_recovery::apply_redo(bool space_id, uint32_t page_no, uint16_t offset, if (offset + size >= ibb_page_size) return !srv_force_recovery; - update_page_from_record(offset, buf, size); - return false; + return update_page_from_record(offset, buf, size); } -void +bool binlog_recovery::update_page_from_record(uint16_t offset, const byte *buf, size_t size) noexcept { memcpy(page_buf + offset, buf, size); + if (cur_page_no == 0 && offset == 0) + { + binlog_header_data header; + /* + This recovery record is for the file header page. + This record is special, it covers only the used part of the header page. + The reaminder of the page must be set to zeroes. + Additionally, there is an extra CRC corresponding to a minimum + page size of IBB_PAGE_SIZE_MIN, in anticipation for future configurable + page size. + */ + memset(page_buf + size, 0, ibb_page_size - (size + BINLOG_PAGE_DATA_END)); + cur_page_offset= (uint32_t)ibb_page_size - BINLOG_PAGE_DATA_END; + uint32_t payload= IBB_HEADER_PAGE_SIZE - BINLOG_PAGE_CHECKSUM; + int4store(page_buf + payload, my_crc32c(0, page_buf, payload)); + fsp_binlog_extract_header_page(page_buf, &header); + if (header.is_invalid) + { + sql_print_error("InnoDB: Corrupt or invalid file header found during " + "recovery of file number %" PRIu64, cur_file_no); + return !srv_force_recovery; + } + if (header.is_empty) + { + sql_print_error("InnoDB: Empty file header found during " + "recovery of file number %" PRIu64, cur_file_no); + return !srv_force_recovery; + } + if (header.file_no != cur_file_no) + { + sql_print_error("InnoDB: Inconsistency in file header during recovery. " + "The header in file number %" PRIu64 " is for file " + "number %" PRIu64, cur_file_no, header.file_no); + return !srv_force_recovery; + } + + return false; + } + cur_page_offset= offset + (uint32_t)size; + return false; } @@ -1107,7 +1145,7 @@ innodb_binlog_init_state() binlog_cur_page_no= 0; binlog_cur_page_offset= BINLOG_PAGE_DATA; current_binlog_state_interval= - (uint32_t)(innodb_binlog_state_interval >> ibb_page_size_shift); + (uint64_t)(innodb_binlog_state_interval >> ibb_page_size_shift); ut_a(innodb_binlog_state_interval == (current_binlog_state_interval << ibb_page_size_shift)); } @@ -1149,6 +1187,7 @@ binlog_sync_initial() mtr.commit(); log_buffer_flush_to_disk(true); binlog_page_fifo->flush_up_to(0, 0); + binlog_page_fifo->do_fdatasync(0); } @@ -1300,41 +1339,57 @@ binlog_page_empty(const byte *page) static int find_pos_in_binlog(uint64_t file_no, size_t file_size, byte *page_buf, - uint32_t *out_page_no, uint32_t *out_pos_in_page) + uint32_t *out_page_no, uint32_t *out_pos_in_page, + uint64_t *out_state_interval) { const uint32_t page_size= (uint32_t)ibb_page_size; const uint32_t page_size_shift= (uint32_t)ibb_page_size_shift; const uint32_t idx= file_no & 1; char file_name[OS_FILE_MAX_PATH]; uint32_t p_0, p_1, p_2, last_nonempty; - dberr_t err; byte *p, *page_end; bool ret; + binlog_header_data header_data; *out_page_no= 0; *out_pos_in_page= BINLOG_PAGE_DATA; + *out_state_interval= 0; binlog_name_make(file_name, file_no); pfs_os_file_t fh= os_file_create(innodb_data_file_key, file_name, OS_FILE_OPEN, OS_DATA_FILE, srv_read_only_mode, &ret); if (!ret) { - sql_print_warning("Unable to open file '%s'", file_name); + sql_print_warning("InnoDB: Unable to open file '%s'", file_name); return -1; } - err= os_file_read(IORequestRead, fh, page_buf, 0, page_size, nullptr); - if (err != DB_SUCCESS) { + int res= crc32_pread_page(fh, page_buf, 0, MYF(MY_WME)); + if (res <= 0) { os_file_close(fh); return -1; } - if (binlog_page_empty(page_buf)) { + fsp_binlog_extract_header_page(page_buf, &header_data); + if (header_data.is_invalid) + { + sql_print_error("InnoDB: Invalid or corrupt file header in file " + "'%s'", file_name); + return -1; + } + if (header_data.is_empty) { ret= fsp_binlog_open(file_name, fh, file_no, file_size, ~(uint32_t)0, nullptr); binlog_cur_written_offset[idx].store(0, std::memory_order_relaxed); binlog_cur_end_offset[idx].store(0, std::memory_order_relaxed); return (ret ? -1 : 0); } + if (header_data.file_no != file_no) + { + sql_print_error("InnoDB: Inconsistent file header in file '%s', " + "wrong file_no %" PRIu64, file_name, header_data.file_no); + return -1; + } + *out_state_interval= header_data.diff_state_interval; last_nonempty= 0; /* @@ -1348,9 +1403,8 @@ find_pos_in_binlog(uint64_t file_no, size_t file_size, byte *page_buf, break; ut_ad(p_0 < p_2); p_1= (p_0 + p_2) / 2; - err= os_file_read(IORequestRead, fh, page_buf, p_1 << page_size_shift, - page_size, nullptr); - if (err != DB_SUCCESS) { + res= crc32_pread_page(fh, page_buf, p_1, MYF(MY_WME)); + if (res <= 0) { os_file_close(fh); return -1; } @@ -1368,9 +1422,8 @@ find_pos_in_binlog(uint64_t file_no, size_t file_size, byte *page_buf, This sometimes does an extra read, but as this is only during startup it does not matter. */ - err= os_file_read(IORequestRead, fh, page_buf, - last_nonempty << page_size_shift, page_size, nullptr); - if (err != DB_SUCCESS) { + res= crc32_pread_page(fh, page_buf, last_nonempty, MYF(MY_WME)); + if (res <= 0) { os_file_close(fh); return -1; } @@ -1416,6 +1469,7 @@ innodb_binlog_discover() const uint32_t page_size= (uint32_t)ibb_page_size; const uint32_t page_size_shift= (uint32_t)ibb_page_size_shift; struct found_binlogs UNINIT_VAR(binlog_files); + uint64_t diff_state_interval; int res= scan_for_binlogs(innodb_binlog_directory, &binlog_files, false); if (res <= 0) @@ -1437,10 +1491,12 @@ innodb_binlog_discover() res= find_pos_in_binlog(binlog_files.last_file_no, binlog_files.last_size, - page_buf.get(), &page_no, &pos_in_page); + page_buf.get(), &page_no, &pos_in_page, + &diff_state_interval); if (res < 0) { file_no= binlog_files.last_file_no; active_binlog_file_no.store(file_no, std::memory_order_release); + current_binlog_state_interval= innodb_binlog_state_interval; sql_print_warning("Binlog number %llu could no be opened. Starting a new " "binlog file from number %llu", binlog_files.last_file_no, (file_no + 1)); @@ -1451,6 +1507,7 @@ innodb_binlog_discover() /* Found start position in the last binlog file. */ file_no= binlog_files.last_file_no; active_binlog_file_no.store(file_no, std::memory_order_release); + current_binlog_state_interval= diff_state_interval; binlog_cur_page_no= page_no; binlog_cur_page_offset= pos_in_page; ib::info() << "Continuing binlog number " << file_no << " from position " @@ -1465,10 +1522,12 @@ innodb_binlog_discover() res= find_pos_in_binlog(binlog_files.prev_file_no, binlog_files.prev_size, page_buf.get(), - &prev_page_no, &prev_pos_in_page); + &prev_page_no, &prev_pos_in_page, + &diff_state_interval); if (res < 0) { file_no= binlog_files.last_file_no; active_binlog_file_no.store(file_no, std::memory_order_release); + current_binlog_state_interval= innodb_binlog_state_interval; binlog_cur_page_no= page_no; binlog_cur_page_offset= pos_in_page; sql_print_warning("Binlog number %llu could not be opened, starting " @@ -1478,6 +1537,7 @@ innodb_binlog_discover() } file_no= binlog_files.prev_file_no; active_binlog_file_no.store(file_no, std::memory_order_release); + current_binlog_state_interval= diff_state_interval; binlog_cur_page_no= prev_page_no; binlog_cur_page_offset= prev_pos_in_page; ib::info() << "Continuing binlog number " << file_no << " from position " @@ -1490,6 +1550,7 @@ innodb_binlog_discover() /* Just one empty binlog file found. */ file_no= binlog_files.last_file_no; active_binlog_file_no.store(file_no, std::memory_order_release); + current_binlog_state_interval= innodb_binlog_state_interval; binlog_cur_page_no= page_no; binlog_cur_page_offset= pos_in_page; ib::info() << "Continuing binlog number " << file_no << " from position " @@ -1501,6 +1562,7 @@ innodb_binlog_discover() file_no= 0; earliest_binlog_file_no= 0; total_binlog_used_size= 0; + current_binlog_state_interval= innodb_binlog_state_interval; ib::info() << "Starting a new binlog from file number " << file_no << "."; return 0; } @@ -1532,8 +1594,8 @@ void innodb_binlog_close(bool shutdown) if (shutdown && innodb_binlog_inited >= 1) { binlog_diff_state.free(); - mysql_mutex_destroy(&purge_binlog_mutex); fsp_binlog_shutdown(); + mysql_mutex_destroy(&purge_binlog_mutex); } } @@ -1624,38 +1686,56 @@ innodb_binlog_prealloc_thread() } +bool +ibb_write_header_page(mtr_t *mtr, uint64_t file_no, uint64_t file_size_in_pages, + lsn_t start_lsn, uint64_t gtid_state_interval_in_pages) +{ + fsp_binlog_page_entry *block; + uint32_t used_bytes; + + block= binlog_page_fifo->create_page(file_no, 0); + ut_a(block /* ToDo: error handling? */); + byte *ptr= &block->page_buf[0]; + + int4store(ptr, IBB_MAGIC); + int4store(ptr + 4, ibb_page_size_shift); + int4store(ptr + 8, IBB_FILE_VERS_MAJOR); + int4store(ptr + 12, IBB_FILE_VERS_MINOR); + int8store(ptr + 16, file_no); + int8store(ptr + 24, file_size_in_pages); + int8store(ptr + 32, start_lsn); + int8store(ptr + 40, gtid_state_interval_in_pages); + used_bytes= 48; + ut_ad(ibb_page_size >= IBB_HEADER_PAGE_SIZE); + memset(ptr + used_bytes, 0, ibb_page_size - (used_bytes + BINLOG_PAGE_CHECKSUM)); + /* + For future expansion with configurable page size: + Write a CRC32 at the end of the minimal page size. This way, the header + page can be read and checksummed without knowing the page size used in + the file, and then the actual page size can be obtained from the header + page. + */ + const uint32_t payload= IBB_HEADER_PAGE_SIZE - BINLOG_PAGE_CHECKSUM; + int4store(ptr + payload, my_crc32c(0, ptr, payload)); + + fsp_log_header_page(mtr, block, used_bytes); + binlog_page_fifo->release_page_mtr(block, mtr); + + return false; // No error +} + + __attribute__((noinline)) static ssize_t -serialize_gtid_state(rpl_binlog_state_base *state, byte *buf, size_t buf_size, - uint32_t file_size_in_pages, uint64_t file_no, - bool is_first_page) +serialize_gtid_state(rpl_binlog_state_base *state, byte *buf, size_t buf_size) + noexcept { unsigned char *p= (unsigned char *)buf; /* - 1 uint64_t for the current LSN at start of binlog file. - 1 uint64_t for the file_no. - 1 uint32_t for the file size in pages. - 1 uint32_t for the innodb_binlog_state_interval in pages. 1 uint64_t for the number of entries in the state stored. 2 uint32_t + 1 uint64_t for at least one GTID. */ - ut_ad(buf_size >= 4*COMPR_INT_MAX32 + 4*COMPR_INT_MAX64); - if (is_first_page) { - /* - In the first page where we put the full state, include the value of the - setting for the interval at which differential states are binlogged, so - we know how to search them independent of how the setting changes. - - We also include the current LSN for recovery purposes; and the file - length and file_no, which is also useful if we have to recover the whole - file from the redo log after a crash. - */ - p= compr_int_write(p, log_sys.get_lsn(std::memory_order_acquire)); - p= compr_int_write(p, file_no); - p= compr_int_write(p, file_size_in_pages); - /* ToDo: Check that this current_binlog_state_interval is the correct value! */ - p= compr_int_write(p, current_binlog_state_interval); - } + ut_ad(buf_size >= 2*COMPR_INT_MAX32 + 2*COMPR_INT_MAX64); p= compr_int_write(p, state->count_nolock()); unsigned char * const pmax= p + (buf_size - (2*COMPR_INT_MAX32 + COMPR_INT_MAX64)); @@ -1678,8 +1758,7 @@ serialize_gtid_state(rpl_binlog_state_base *state, byte *buf, size_t buf_size, bool binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr, fsp_binlog_page_entry * &block, uint32_t &page_no, - uint32_t &page_offset, uint64_t file_no, - uint32_t file_size_in_pages) + uint32_t &page_offset, uint64_t file_no) { /* Use a small, efficient stack-allocated buffer by default, falling back to @@ -1690,9 +1769,7 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr, uint32_t block_page_no= ~(uint32_t)0; block= nullptr; - ssize_t used_bytes= serialize_gtid_state(state, small_buf, sizeof(small_buf), - file_size_in_pages, file_no, - page_no==0); + ssize_t used_bytes= serialize_gtid_state(state, small_buf, sizeof(small_buf)); if (used_bytes >= 0) { buf= small_buf; @@ -1706,8 +1783,7 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr, if (UNIV_UNLIKELY(!alloced_buf)) return true; buf= alloced_buf; - used_bytes= serialize_gtid_state(state, buf, buf_size, file_size_in_pages, - file_no, page_no==0); + used_bytes= serialize_gtid_state(state, buf, buf_size); if (UNIV_UNLIKELY(used_bytes < 0)) { ut_ad(0 /* Shouldn't happen, as we allocated maximum needed size. */); @@ -1722,6 +1798,8 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr, /* For now, GTID state always at the start of a page. */ ut_ad(page_offset == BINLOG_PAGE_DATA); + /* Page 0 is reserved for the header page. */ + ut_ad(page_no != 0); /* Only write the GTID state record if there is room for actual event data @@ -1792,16 +1870,12 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr, */ static int read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page, - uint32_t page_no, binlog_header_data *out_header_data) + uint32_t page_no) noexcept { const byte *p= page + BINLOG_PAGE_DATA; byte t= *p; if (UNIV_UNLIKELY((t & FSP_BINLOG_TYPE_MASK) != FSP_BINLOG_TYPE_GTID_STATE)) - { - out_header_data->is_empty= binlog_page_empty(page); return 0; - } - out_header_data->is_empty= false; /* ToDo: Handle reading a state that spans multiple pages. For now, we assume the state fits in a single page. */ ut_a(t & FSP_BINLOG_FLAG_LAST); @@ -1811,42 +1885,6 @@ read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page, return -1; std::pair v_and_p= compr_int_read(p + 3); p= v_and_p.second; - if (page_no == 0) - { - /* - The state in the first page has four extra words: The start LSN of the - file; the file_no of the file; the file length, in pages; and the offset - between differential binlog states logged regularly in the binlog - tablespace. - */ - if (UNIV_UNLIKELY(p >= p_end)) - return -1; - out_header_data->start_lsn= (uint32_t)v_and_p.first; - v_and_p= compr_int_read(p); - p= v_and_p.second; - if (UNIV_UNLIKELY(p >= p_end)) - return -1; - out_header_data->file_no= v_and_p.first; - v_and_p= compr_int_read(p); - p= v_and_p.second; - if (UNIV_UNLIKELY(p >= p_end) || UNIV_UNLIKELY(v_and_p.first >= UINT32_MAX)) - return -1; - out_header_data->page_count= (uint32_t)v_and_p.first; - v_and_p= compr_int_read(p); - p= v_and_p.second; - if (UNIV_UNLIKELY(p >= p_end) || UNIV_UNLIKELY(v_and_p.first >= UINT32_MAX)) - return -1; - out_header_data->diff_state_interval= (uint32_t)v_and_p.first; - v_and_p= compr_int_read(p); - p= v_and_p.second; - } - else - { - out_header_data->start_lsn= 0; - out_header_data->file_no= ~(uint64_t)0; - out_header_data->page_count= 0; - out_header_data->diff_state_interval= 0; - } if (UNIV_UNLIKELY(p > p_end)) return -1; @@ -1899,8 +1937,7 @@ read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page, -1 Error */ static int -read_gtid_state(rpl_binlog_state_base *state, File file, uint32_t page_no, - binlog_header_data *out_header_data) +read_gtid_state(rpl_binlog_state_base *state, File file, uint32_t page_no) { std::unique_ptr page_buf ((byte *)my_malloc(PSI_NOT_INSTRUMENTED, ibb_page_size, MYF(MY_WME)), @@ -1909,12 +1946,11 @@ read_gtid_state(rpl_binlog_state_base *state, File file, uint32_t page_no, return -1; /* ToDo: Handle encryption. */ - size_t res= crc32_pread_page(file, page_buf.get(), page_no, MYF(MY_WME)); - if (UNIV_UNLIKELY(res == (size_t)-1)) + int res= crc32_pread_page(file, page_buf.get(), page_no, MYF(MY_WME)); + if (UNIV_UNLIKELY(res <= 0)) return -1; - return read_gtid_state_from_page(state, page_buf.get(), page_no, - out_header_data); + return read_gtid_state_from_page(state, page_buf.get(), page_no); } @@ -1929,40 +1965,36 @@ read_gtid_state(rpl_binlog_state_base *state, File file, uint32_t page_no, static bool binlog_state_recover() { - binlog_header_data header_data; rpl_binlog_state_base state; state.init(); - uint32_t diff_state_interval= 0; - uint32_t page_no= 0; + uint64_t active= active_binlog_file_no.load(std::memory_order_relaxed); + uint64_t diff_state_interval= current_binlog_state_interval; + uint32_t page_no= 1; char filename[OS_FILE_MAX_PATH]; - binlog_name_make(filename, - active_binlog_file_no.load(std::memory_order_relaxed)); + binlog_name_make(filename, active); File file= my_open(filename, O_RDONLY | O_BINARY, MYF(MY_WME)); if (UNIV_UNLIKELY(file < (File)0)) return true; - int res= read_gtid_state(&state, file, page_no, &header_data); + int res= read_gtid_state(&state, file, page_no); if (res < 0) { my_close(file, MYF(0)); return true; } - diff_state_interval= header_data.diff_state_interval; if (diff_state_interval == 0) { sql_print_warning("Invalid differential binlog state interval %llu found " "in binlog file, ignoring", diff_state_interval); - current_binlog_state_interval= 0; /* Disable in this binlog file */ } else { - current_binlog_state_interval= diff_state_interval; page_no= (uint32_t)(binlog_cur_page_no - (binlog_cur_page_no % diff_state_interval)); - while (page_no > 0) + while (page_no > 1) { - res= read_gtid_state(&state, file, page_no, &header_data); + res= read_gtid_state(&state, file, page_no); if (res > 0) break; page_no-= (uint32_t)diff_state_interval; @@ -1970,9 +2002,7 @@ binlog_state_recover() } my_close(file, MYF(0)); - ha_innodb_binlog_reader reader(active_binlog_file_no.load - (std::memory_order_relaxed), - page_no << ibb_page_size_shift); + ha_innodb_binlog_reader reader(active, page_no << ibb_page_size_shift); return binlog_recover_gtid_state(&state, &reader); } @@ -2410,6 +2440,8 @@ ha_innodb_binlog_reader::ha_innodb_binlog_reader(uint64_t file_no, { page_buf= (uchar *)ut_malloc(ibb_page_size, mem_key_binlog); chunk_rd.set_page_buf(page_buf); + if (offset < ibb_page_size) + offset= ibb_page_size; chunk_rd.seek(file_no, offset); chunk_rd.skip_partial(true); } @@ -2617,10 +2649,8 @@ gtid_search::~gtid_search() enum gtid_search::Read_Result gtid_search::read_gtid_state_file_no(rpl_binlog_state_base *state, uint64_t file_no, uint32_t page_no, - uint64_t *out_file_end, - uint32_t *out_diff_state_interval) + uint64_t *out_file_end) { - binlog_header_data header_data; *out_file_end= 0; uint64_t active2= active_binlog_file_no.load(std::memory_order_acquire); if (file_no > active2) @@ -2675,9 +2705,7 @@ gtid_search::read_gtid_state_file_no(rpl_binlog_state_base *state, if (block) { ut_ad(end_offset != ~(uint64_t)0); - int res= read_gtid_state_from_page(state, block->page_buf, page_no, - &header_data); - *out_diff_state_interval= header_data.diff_state_interval; + int res= read_gtid_state_from_page(state, block->page_buf, page_no); binlog_page_fifo->release_page(block); return (Read_Result)res; } @@ -2716,8 +2744,7 @@ gtid_search::read_gtid_state_file_no(rpl_binlog_state_base *state, } if (!*out_file_end) *out_file_end= cur_open_file_length; - int res= read_gtid_state(state, cur_open_file, page_no, &header_data); - *out_diff_state_interval= header_data.diff_state_interval; + int res= read_gtid_state(state, cur_open_file, page_no); return (Read_Result)res; } } @@ -2735,7 +2762,6 @@ gtid_search::read_gtid_state_file_no(rpl_binlog_state_base *state, 0 Position not found (has been purged) 1 Position found */ - int gtid_search::find_gtid_pos(slave_connection_state *pos, rpl_binlog_state_base *out_state, @@ -2750,14 +2776,39 @@ gtid_search::find_gtid_pos(slave_connection_state *pos, /* First search backwards for the right file to start from. */ uint64_t file_end= 0; - uint32_t diff_state_page_interval= 0; + uint64_t diff_state_page_interval= 0; rpl_binlog_state_base base_state, page0_diff_state, tmp_diff_state; base_state.init(); for (;;) { + /* + Read the header page, needed to get the binlog diff state interval. + ToDo: Here we instantiate our own binlog_chunk_reader specifically for + this. Later, when read_gtid_state_file_no() is fixed to also use a + binlog_chunk_reader, integrate and use the same single + binlog_chunk_reader object. + */ + binlog_header_data header; + int err; + byte *page_buffer= (byte *)ut_malloc(ibb_page_size, mem_key_binlog); + if (!page_buffer) + { + my_error(ER_OUTOFMEMORY, MYF(0), ibb_page_size); + return -1; + } + { + binlog_chunk_reader chunk_reader; + chunk_reader.set_page_buf(page_buffer); + chunk_reader.seek(file_no, 0); + err= chunk_reader.get_file_header(&header); + diff_state_page_interval= header.diff_state_interval; + } + ut_free(page_buffer); + if (err) + return -1; + enum Read_Result res= - read_gtid_state_file_no(&base_state, file_no, 0, &file_end, - &diff_state_page_interval); + read_gtid_state_file_no(&base_state, file_no, 1, &file_end); if (res == READ_ENOENT) return 0; if (res == READ_ERROR) @@ -2769,7 +2820,7 @@ gtid_search::find_gtid_pos(slave_connection_state *pos, /* Handle the special case of a completely empty binlog file. */ out_state->reset_nolock(); *out_file_no= file_no; - *out_offset= 0; + *out_offset= ibb_page_size; return 1; } ut_ad(0 /* Not expected to find no state, should always be written. */); @@ -2794,20 +2845,18 @@ gtid_search::find_gtid_pos(slave_connection_state *pos, uint32_t page2= (uint32_t) (diff_state_page_interval + ((file_end - 1) >> ibb_page_size_shift)); /* Round to the next diff_state_page_interval after file_end. */ - page2-= page2 % diff_state_page_interval; + page2-= page2 % (uint32_t)diff_state_page_interval; uint32_t page1= (page0 + page2) / 2; page0_diff_state.init(); page0_diff_state.load_nolock(&base_state); tmp_diff_state.init(); - while (page1 >= page0 + diff_state_page_interval) + while (page1 >= page0 + diff_state_page_interval && page1 > 1) { ut_ad((page1 - page0) % diff_state_page_interval == 0); tmp_diff_state.reset_nolock(); tmp_diff_state.load_nolock(&base_state); - uint32_t dummy; enum Read_Result res= - read_gtid_state_file_no(&tmp_diff_state, file_no, page1, &file_end, - &dummy); + read_gtid_state_file_no(&tmp_diff_state, file_no, page1, &file_end); if (res == READ_ENOENT) return 0; /* File purged while we are reading from it? */ if (res == READ_ERROR) @@ -2819,7 +2868,7 @@ gtid_search::find_gtid_pos(slave_connection_state *pos, try the one just before. It will be safe, even if not always optimal, and this is an abnormal situation anyway. */ - page1= page1 - diff_state_page_interval; + page1= page1 - (uint32_t)diff_state_page_interval; continue; } if (tmp_diff_state.is_before_pos(pos)) @@ -2835,6 +2884,8 @@ gtid_search::find_gtid_pos(slave_connection_state *pos, ut_ad(page1 >= page0); out_state->load_nolock(&page0_diff_state); *out_file_no= file_no; + if (page0 == 0) + page0= 1; /* Skip the initial file header page. */ *out_offset= (uint64_t)page0 << ibb_page_size_shift; return 1; } @@ -2888,6 +2939,8 @@ ha_innodb_binlog_reader::init_legacy_pos(const char *filename, ulonglong offset) reached. This way we avoid reading garbaga data for invalid request offset. */ + if (offset < ibb_page_size) + offset= ibb_page_size; chunk_rd.seek(file_no, (uint64_t)offset); chunk_rd.skip_partial(true); cur_file_no= chunk_rd.current_file_no(); @@ -2980,14 +3033,12 @@ innodb_binlog_get_init_state(rpl_binlog_state_base *out_state) { gtid_search search_obj; uint64_t dummy_file_end; - uint32_t dummy_diff_state_interval; bool err= false; mysql_mutex_lock(&purge_binlog_mutex); uint64_t file_no= earliest_binlog_file_no; enum gtid_search::Read_Result res= - search_obj.read_gtid_state_file_no(out_state, file_no, 0, &dummy_file_end, - &dummy_diff_state_interval); + search_obj.read_gtid_state_file_no(out_state, file_no, 1, &dummy_file_end); mysql_mutex_unlock(&purge_binlog_mutex); if (res != gtid_search::READ_FOUND) err= true; diff --git a/storage/innobase/include/fsp_binlog.h b/storage/innobase/include/fsp_binlog.h index e373b7548d5..8b91bf1cdd1 100644 --- a/storage/innobase/include/fsp_binlog.h +++ b/storage/innobase/include/fsp_binlog.h @@ -32,7 +32,27 @@ InnoDB implementation of binlog. struct chunk_data_base; +struct binlog_header_data; +/* 4-byte "magic" identifying InnoDB binlog file (little endian). */ +static constexpr uint32_t IBB_MAGIC= 0x010dfefe; +static constexpr uint32_t IBB_FILE_VERS_MAJOR= 0; +static constexpr uint32_t IBB_FILE_VERS_MINOR= 0; + +/* + The size of the header page that is stored in the first page of a file. + This is the smallest page size that can be used in a backwards compatible + way. Having a fixed-size small header page means we can get the real page + size of the file from the header page, but still be able to checksum the + header page without relying on unchecked page size field to compute the + checksum. + + (The remainder of the header page is just unused or could potentially + later be used for other data as needed). +*/ +static constexpr uint32_t IBB_HEADER_PAGE_SIZE= 512; +static constexpr uint32_t IBB_PAGE_SIZE_MIN= IBB_HEADER_PAGE_SIZE; +static constexpr uint32_t IBB_PAGE_SIZE_MAX= 65536; /** Store crc32 checksum at the end of the page */ #define BINLOG_PAGE_CHECKSUM 4 @@ -266,6 +286,8 @@ public: of the current binlog (ie. end-of-file). */ int read_data(byte *buffer, int max_len, bool multipage); + /* Read the file header of current file_no. */ + int get_file_header(binlog_header_data *out_header); /* Save current position, and restore it later. */ void save_pos(saved_position *out_pos) { *out_pos= s; } @@ -294,7 +316,8 @@ public: extern uint32_t ibb_page_size_shift; extern ulong ibb_page_size; -extern uint32_t current_binlog_state_interval; +/* The state interval (in pages) used for active_binlog_file_no. */ +extern uint64_t current_binlog_state_interval; extern mysql_mutex_t active_binlog_mutex; extern pthread_cond_t active_binlog_cond; extern std::atomic active_binlog_file_no; @@ -313,11 +336,18 @@ fsp_binlog_release(fsp_binlog_page_entry *page) extern size_t crc32_pwrite_page(File fd, byte *buf, uint32_t page_no, myf MyFlags) noexcept; -extern size_t crc32_pread_page(File fd, byte *buf, uint32_t page_no, - myf MyFlags) noexcept; +extern int crc32_pread_page(File fd, byte *buf, uint32_t page_no, + myf MyFlags) noexcept; +extern int crc32_pread_page(pfs_os_file_t fh, byte *buf, uint32_t page_no, + myf MyFlags) noexcept; extern void binlog_write_up_to_now() noexcept; +extern void fsp_binlog_extract_header_page(const byte *page_buf, + binlog_header_data *out_header_data) + noexcept; extern void fsp_log_binlog_write(mtr_t *mtr, fsp_binlog_page_entry *page, uint32_t page_offset, uint32_t len); +extern void fsp_log_header_page(mtr_t *mtr, fsp_binlog_page_entry *page, + uint32_t len) noexcept; extern void fsp_binlog_init(); extern void fsp_binlog_shutdown(); extern dberr_t fsp_binlog_tablespace_close(uint64_t file_no); diff --git a/storage/innobase/include/innodb_binlog.h b/storage/innobase/include/innodb_binlog.h index dcf4c7c0634..c460ddf91d0 100644 --- a/storage/innobase/include/innodb_binlog.h +++ b/storage/innobase/include/innodb_binlog.h @@ -76,8 +76,8 @@ struct chunk_data_flush : public chunk_data_base { /* Data stored at the start of each binlog file. - (The data is stored in the file as compressed integers; this is just a - struct to pass around the values in-memory). + (The data is stored as little-engian values in the first page of the file; + this is just a struct to pass around the values in-memory). */ struct binlog_header_data { /* @@ -93,16 +93,28 @@ struct binlog_header_data { */ uint64_t file_no; /* The length of this binlog file, in pages. */ - uint32_t page_count; + uint64_t page_count; /* The interval (in pages) at which the (differential) binlog GTID state is written into the binlog file, for faster GTID position search. This corresponds to the value of --innodb-binlog-state-interval at the time the binlog file was created. */ - uint32_t diff_state_interval; + uint64_t diff_state_interval; + /* The log_2 of the page size (eg. ibb_page_size_shift). */ + uint32_t page_size_shift; + /* + Major and minor file format version number. The idea is that minor version + increments are backwards compatible, major version upgrades are not. + */ + uint32_t vers_major, vers_minor; /* Whether the page was found empty. */ bool is_empty; + /* + Whether the page was found invalid, bad magic or major version, or CRC32 + error (and not empty). + */ + bool is_invalid; }; @@ -149,10 +161,12 @@ binlog_name_make_short(char *name_buf, uint64_t file_no) extern void innodb_binlog_startup_init(); extern bool innodb_binlog_init(size_t binlog_size, const char *directory); extern void innodb_binlog_close(bool shutdown); +extern bool ibb_write_header_page(mtr_t *mtr, uint64_t file_no, + uint64_t file_size_in_pages, lsn_t start_lsn, + uint64_t gtid_state_interval_in_pages); extern bool binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr, fsp_binlog_page_entry * &block, uint32_t &page_no, - uint32_t &page_offset, uint64_t file_no, - uint32_t file_size_in_pages); + uint32_t &page_offset, uint64_t file_no); extern bool innodb_binlog_oob(THD *thd, const unsigned char *data, size_t data_len, void **engine_data); extern void innodb_free_oob(THD *thd, void *engine_data);