1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-34705: Binlog-in-engine: Implement file header page

Now the first page of each binlog tablespace file is reserved as a file
header, replacing the use of extra fields in the first gtid state record of
the file. The header is primarily used during recovery, especially to get
the file LSN before which no redo should be applied to the file.

Using a dedicated page makes it possible to durably sync the file header to
disk after RESET MASTER (and at first server startup) and not have it
overwritten (and potentially corrupted) later; this guarantees that the
recovery will have at least one file header to look at to determine from
which LSN to apply redo records.

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
Kristian Nielsen
2025-04-02 10:21:08 +02:00
parent 21751e21f1
commit e1055af14f
7 changed files with 406 additions and 196 deletions

View File

@@ -18,11 +18,11 @@ binlog-000001.ibb 262144
FLUSH BINARY LOGS;
SHOW BINARY LOGS;
Log_name File_size
binlog-000000.ibb 36864
binlog-000000.ibb 40960
binlog-000001.ibb 262144
binlog-000002.ibb 262144
SET STATEMENT sql_log_bin=0 FOR
CALL mtr.add_suppression("InnoDB: Page corruption in binlog tablespace file page number 0");
CALL mtr.add_suppression("InnoDB: Page corruption in binlog tablespace file page number 1");
FLUSH BINARY LOGS;
FLUSH BINARY LOGS;
SHOW BINLOG EVENTS IN 'binlog-000000.ibb' LIMIT 1;
@@ -83,7 +83,7 @@ binlog-000022.ibb 262144
binlog-000023.ibb 262144
binlog-000024.ibb 262144
SET @now= NOW();
*** Do 187 inserts ...
*** Do 149 inserts ...
PURGE BINARY LOGS BEFORE @now;
SHOW BINARY LOGS;
Log_name File_size

View File

@@ -27,22 +27,23 @@ FLUSH BINARY LOGS;
SHOW BINARY LOGS;
# Flush couple logs so we are sure the first file is on disk.
# Corrupt one bit in the first page of the first file to test that crc32
# Corrupt one bit in the first data page of the first file to test that crc32
# mismatch is caught.
SET STATEMENT sql_log_bin=0 FOR
CALL mtr.add_suppression("InnoDB: Page corruption in binlog tablespace file page number 0");
CALL mtr.add_suppression("InnoDB: Page corruption in binlog tablespace file page number 1");
FLUSH BINARY LOGS;
FLUSH BINARY LOGS;
--let $file= binlog-000000.ibb
--let $datadir= `SELECT @@datadir`
--let BINLOG_FILE= $datadir/$file
perl;
my $pos= 4096 + 50; # Early byte in page 1 (page 0 is file header).
open F, '+<', $ENV{BINLOG_FILE} or die $!;
sysseek F, 50, 0 or die $!;
sysseek F, $pos, 0 or die $!;
my $x;
sysread F, $x, 1 or die $!;
$x= chr(ord($x) ^ (1 <<3));
sysseek F, 50, 0 or die $!;
sysseek F, $pos, 0 or die $!;
syswrite F, $x, 1 or die $!;
EOF
@@ -146,7 +147,7 @@ SHOW BINARY LOGS;
--sleep 1
SET @now= NOW();
--sleep 1
--let $num_insert= `SELECT floor(256*1.5*1024/2100)`
--let $num_insert= `SELECT floor(256*1.2*1024/2100)`
--echo *** Do $num_insert inserts ...
--disable_query_log
BEGIN;

View File

@@ -57,6 +57,6 @@ EOF
--source include/wait_until_connected_again.inc
--let $binlog_file=
--let $binlog_start= 0
--let $binlog_start= 4
--source include/show_binlog_events.inc
DROP TABLE t1;

View File

@@ -58,9 +58,10 @@ ulong ibb_page_size= (1 << ibb_page_size_shift);
This value must be used over the setting innodb_binlog_state_interval,
because after a restart the latest binlog file will be using the value of the
setting prior to the restart; the new value of the setting (if different)
will be used for newly created binlog files.
will be used for newly created binlog files. The value refers to the file
of active_binlog_file_no.
*/
uint32_t current_binlog_state_interval;
uint64_t current_binlog_state_interval;
/*
Mutex protecting active_binlog_file_no.
@@ -583,7 +584,7 @@ fsp_binlog_page_fifo::flush_thread_run()
if (all_flushed && file_no <= first_file_no)
all_flushed= flush_one_page(file_no + 1, false);
}
if (all_flushed)
if (all_flushed && !flush_thread_end)
my_cond_wait(&m_cond, &m_mutex.m_mutex);
}
@@ -597,21 +598,30 @@ size_t
crc32_pwrite_page(File fd, byte *buf, uint32_t page_no, myf MyFlags) noexcept
{
const uint32_t payload= (uint32_t)ibb_page_size - BINLOG_PAGE_CHECKSUM;
mach_write_to_4(buf + payload, my_crc32c(0, buf, payload));
int4store(buf + payload, my_crc32c(0, buf, payload));
return my_pwrite(fd, (const uchar *)buf, ibb_page_size,
(my_off_t)page_no << ibb_page_size_shift, MyFlags);
}
size_t
/*
Read a page, with CRC check.
Returns:
-1 error
0 EOF
1 Ok
*/
int
crc32_pread_page(File fd, byte *buf, uint32_t page_no, myf MyFlags) noexcept
{
size_t res= my_pread(fd, buf, ibb_page_size,
size_t read= my_pread(fd, buf, ibb_page_size,
(my_off_t)page_no << ibb_page_size_shift, MyFlags);
if (UNIV_LIKELY(res == ibb_page_size))
int res= 1;
if (UNIV_LIKELY(read == ibb_page_size))
{
const uint32_t payload= (uint32_t)ibb_page_size - BINLOG_PAGE_CHECKSUM;
uint32_t crc32= mach_read_from_4(buf + payload);
uint32_t crc32= uint4korr(buf + payload);
/* Allow a completely zero (empty) page as well. */
if (UNIV_UNLIKELY(crc32 != my_crc32c(0, buf, payload)) &&
(buf[0] != 0 || 0 != memcmp(buf, buf+1, ibb_page_size - 1)))
@@ -624,10 +634,46 @@ crc32_pread_page(File fd, byte *buf, uint32_t page_no, myf MyFlags) noexcept
page_no, crc32);
}
}
else if (read == (size_t)-1)
res= -1;
else
res= 0;
return res;
}
int
crc32_pread_page(pfs_os_file_t fh, byte *buf, uint32_t page_no, myf MyFlags)
noexcept
{
const uint32_t page_size= (uint32_t)ibb_page_size;
ulint bytes_read= 0;
dberr_t err= os_file_read(IORequestRead, fh, buf,
(os_offset_t)page_no << ibb_page_size_shift,
page_size, &bytes_read);
if (UNIV_UNLIKELY(err != DB_SUCCESS))
return -1;
else if (UNIV_UNLIKELY(bytes_read < page_size))
return 0;
const uint32_t payload= (uint32_t)ibb_page_size - BINLOG_PAGE_CHECKSUM;
uint32_t crc32= uint4korr(buf + payload);
/* Allow a completely zero (empty) page as well. */
if (UNIV_UNLIKELY(crc32 != my_crc32c(0, buf, payload)) &&
(buf[0] != 0 || 0 != memcmp(buf, buf+1, ibb_page_size - 1)))
{
my_errno= EIO;
if (MyFlags & MY_WME)
sql_print_error("InnoDB: Page corruption in binlog tablespace file "
"page number %u (invalid crc32 checksum 0x%08X)",
page_no, crc32);
return -1;
}
return 1;
}
void
binlog_write_up_to_now() noexcept
{
@@ -654,6 +700,36 @@ binlog_write_up_to_now() noexcept
}
void
fsp_binlog_extract_header_page(const byte *page_buf,
binlog_header_data *out_header_data) noexcept
{
uint32_t magic= uint4korr(page_buf);
uint32_t vers_major= uint4korr(page_buf + 8);
const uint32_t payload= IBB_HEADER_PAGE_SIZE - BINLOG_PAGE_CHECKSUM;
uint32_t crc32= uint4korr(page_buf + payload);
out_header_data->is_empty= false;
out_header_data->is_invalid= false;
if (crc32 != my_crc32c(0, page_buf, payload) ||
magic != IBB_MAGIC || vers_major > IBB_FILE_VERS_MAJOR)
{
if (page_buf[0] == 0 &&
0 == memcmp(page_buf, page_buf+1, IBB_HEADER_PAGE_SIZE - 1))
out_header_data->is_empty= true;
else
out_header_data->is_invalid= true;
return;
}
out_header_data->page_size_shift= uint4korr(page_buf + 4);
out_header_data->vers_major= vers_major;
out_header_data->vers_minor= uint4korr(page_buf + 12);
out_header_data->file_no= uint8korr(page_buf + 16);
out_header_data-> page_count= uint8korr(page_buf + 24);
out_header_data-> start_lsn= uint8korr(page_buf + 32);
out_header_data-> diff_state_interval= uint8korr(page_buf + 40);
}
void
fsp_log_binlog_write(mtr_t *mtr, fsp_binlog_page_entry *page,
uint32_t page_offset, uint32_t len)
@@ -678,6 +754,19 @@ fsp_log_binlog_write(mtr_t *mtr, fsp_binlog_page_entry *page,
page_offset + &page->page_buf[0], len);
}
void
fsp_log_header_page(mtr_t *mtr, fsp_binlog_page_entry *page, uint32_t len)
noexcept
{
uint64_t file_no= page->file_no;
uint32_t page_no= page->page_no;
ut_ad(page_no == 0);
page->complete= true;
mtr->write_binlog((file_no & 1), page_no, 0, &page->page_buf[0], len);
}
/*
Initialize the InnoDB implementation of binlog.
Note that we do not create or open any binlog tablespaces here.
@@ -849,6 +938,7 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
byte cont_flag= 0;
for (;;) {
if (page_offset == BINLOG_PAGE_DATA) {
ut_ad(!block);
uint32_t file_size_in_pages= binlog_page_fifo->size_in_pages(file_no);
if (UNIV_UNLIKELY(page_no >= file_size_in_pages)) {
/*
@@ -859,7 +949,7 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
The normal case is that the next tablespace is already pre-allocated
and available; binlog tablespace N is active while (N+1) is being
pre-allocated. Only under extreme I/O pressure should be need to
pre-allocated. Only under extreme I/O pressure should we need to
stall here.
*/
ut_ad(!pending_prev_end_offset);
@@ -873,14 +963,24 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
// ToDo: assert that a single write doesn't span more than two binlog files.
++file_no;
file_size_in_pages= binlog_page_fifo->size_in_pages(file_no);
binlog_cur_written_offset[file_no & 1].store(0, std::memory_order_relaxed);
binlog_cur_end_offset[file_no & 1].store(0, std::memory_order_relaxed);
pthread_cond_signal(&active_binlog_cond);
mysql_mutex_unlock(&active_binlog_mutex);
binlog_cur_page_no= page_no= 0;
/* ToDo: Here we must use the value from the file, if this file was pre-allocated before a server restart where the value of innodb_binlog_state_interval changed. Maybe just make innodb_binlog_state_interval dynamic and make the prealloc thread (and discover code at startup) supply the correct value to use for each file. */
current_binlog_state_interval=
(uint32_t)(innodb_binlog_state_interval >> page_size_shift);
(uint64_t)(innodb_binlog_state_interval >> page_size_shift);
}
/* Write the header page at the start of a binlog tablespace file. */
if (page_no == 0)
{
lsn_t start_lsn= log_sys.get_lsn(std::memory_order_acquire);
bool err= ibb_write_header_page(mtr, file_no, file_size_in_pages,
start_lsn, current_binlog_state_interval);
ut_a(!err /* ToDo error handling */);
page_no= 1;
}
/* Must be a power of two. */
@@ -888,14 +988,15 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
current_binlog_state_interval ==
(uint64_t)1 << (63 - nlz(current_binlog_state_interval)));
if (0 == (page_no & (current_binlog_state_interval - 1))) {
if (page_no == 0) {
if (page_no == 1 ||
0 == (page_no & (current_binlog_state_interval - 1))) {
if (page_no == 1) {
rpl_binlog_state_base full_state;
bool err;
full_state.init();
err= load_global_binlog_state(&full_state);
ut_a(!err /* ToDo error handling */);
if (UNIV_UNLIKELY(file_no == 0 && page_no == 0) &&
if (UNIV_UNLIKELY(file_no == 0 && page_no == 1) &&
(full_state.count_nolock() == 1))
{
/*
@@ -932,14 +1033,14 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
}
}
err= binlog_gtid_state(&full_state, mtr, block, page_no,
page_offset, file_no, file_size_in_pages);
page_offset, file_no);
ut_a(!err /* ToDo error handling */);
ut_ad(block);
full_state.free();
binlog_diff_state.reset_nolock();
} else {
bool err= binlog_gtid_state(&binlog_diff_state, mtr, block, page_no,
page_offset, file_no, file_size_in_pages);
page_offset, file_no);
ut_a(!err /* ToDo error handling */);
}
} else
@@ -1256,15 +1357,15 @@ binlog_chunk_reader::fetch_current_page()
cur_file_length= ~(uint64_t)0;
}
++s.file_no;
s.page_no= 0;
s.page_no= 1; /* Skip the header page. */
continue;
}
size_t res= crc32_pread_page(cur_file_handle, page_buffer, s.page_no,
int res= crc32_pread_page(cur_file_handle, page_buffer, s.page_no,
MYF(MY_WME));
if (res == (size_t)-1)
if (res < 0)
return CHUNK_READER_ERROR;
if (res == 0 && my_errno == HA_ERR_FILE_TOO_SHORT)
if (res == 0)
goto goto_next_file;
page_ptr= page_buffer;
return CHUNK_READER_FOUND;
@@ -1459,7 +1560,7 @@ go_next_page:
cur_file_handle= (File)-1;
cur_file_length= ~(uint64_t)0;
++s.file_no;
s.page_no= 0;
s.page_no= 1; /* Skip the header page. */
}
}
@@ -1470,6 +1571,19 @@ go_next_page:
}
int
binlog_chunk_reader::get_file_header(binlog_header_data *out_header)
{
seek(current_file_no(), 0);
if (fetch_current_page() != CHUNK_READER_FOUND)
return -1;
fsp_binlog_extract_header_page(page_ptr, out_header);
if (out_header->is_invalid || out_header->is_empty)
return -1;
return 0;
}
void
binlog_chunk_reader::restore_pos(binlog_chunk_reader::saved_position *pos)
{

View File

@@ -389,8 +389,7 @@ public:
~gtid_search();
enum Read_Result read_gtid_state_file_no(rpl_binlog_state_base *state,
uint64_t file_no, uint32_t page_no,
uint64_t *out_file_end,
uint32_t *out_diff_state_interval);
uint64_t *out_file_end);
int find_gtid_pos(slave_connection_state *pos,
rpl_binlog_state_base *out_state, uint64_t *out_file_no,
uint64_t *out_offset);
@@ -491,7 +490,7 @@ public:
bool close_file() noexcept;
bool next_file() noexcept;
bool next_page() noexcept;
void update_page_from_record(uint16_t offset,
bool update_page_from_record(uint16_t offset,
const byte *buf, size_t size) noexcept;
};
@@ -506,8 +505,8 @@ static int innodb_binlog_discover();
static bool binlog_state_recover();
static void innodb_binlog_autopurge(uint64_t first_open_file_no);
static int read_gtid_state_from_page(rpl_binlog_state_base *state,
const byte *page, uint32_t page_no,
binlog_header_data *out_header_data);
const byte *page, uint32_t page_no)
noexcept;
/*
@@ -526,7 +525,6 @@ binlog_recovery::get_header(uint64_t file_no, lsn_t &out_lsn, bool &out_empty)
noexcept
{
char full_path[OS_FILE_MAX_PATH];
rpl_binlog_state_base dummy_state;
binlog_header_data header;
out_empty= true;
@@ -547,14 +545,13 @@ binlog_recovery::get_header(uint64_t file_no, lsn_t &out_lsn, bool &out_empty)
it as an empty file.
*/
const uint32_t payload= (uint32_t)ibb_page_size - BINLOG_PAGE_CHECKSUM;
uint32_t crc32= mach_read_from_4(page_buf + payload);
uint32_t crc32= uint4korr(page_buf + payload);
if (UNIV_UNLIKELY(crc32 != my_crc32c(0, page_buf, payload)))
return 0;
dummy_state.init();
int res= read_gtid_state_from_page(&dummy_state, page_buf, 0, &header);
if (res <= 0)
return res;
fsp_binlog_extract_header_page(page_buf, &header);
if (header.is_invalid)
return 0;
if (!header.is_empty)
{
out_empty= false;
@@ -724,8 +721,8 @@ binlog_recovery::init_recovery_from(uint64_t file_no, lsn_t file_lsn,
skipping_early_lsn= false;
if (offset <= BINLOG_PAGE_DATA)
{
update_page_from_record(offset, buf, size);
skipping_partial_page= false;
return update_page_from_record(offset, buf, size);
}
}
return false;
@@ -852,21 +849,23 @@ binlog_recovery::zero_out_cur_file()
return;
/* Recover the original size from the current file. */
size_t read= crc32_pread_page(cur_file_fh, page_buf, 0, MYF(0));
if (read != (size_t)ibb_page_size)
int res= crc32_pread_page(cur_file_fh, page_buf, 0, MYF(0));
if (res <= 0)
{
sql_print_warning("InnoDB: Could not read last binlog file during recovery");
return;
}
binlog_header_data header;
rpl_binlog_state_base dummy_state;
dummy_state.init();
int res= read_gtid_state_from_page(&dummy_state, page_buf, 0, &header);
if (res <= 0)
fsp_binlog_extract_header_page(page_buf, &header);
if (header.is_invalid)
{
sql_print_warning("InnoDB: Invalid header page in last binlog file "
"during recovery");
return;
}
if (header.is_empty)
{
if (res < 0)
sql_print_warning("InnoDB: Could not read last binlog file during recovery");
else
sql_print_warning("InnoDB: Empty binlog file header found during recovery");
ut_ad(0);
return;
@@ -1040,17 +1039,56 @@ binlog_recovery::apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
if (offset + size >= ibb_page_size)
return !srv_force_recovery;
update_page_from_record(offset, buf, size);
return false;
return update_page_from_record(offset, buf, size);
}
void
bool
binlog_recovery::update_page_from_record(uint16_t offset,
const byte *buf, size_t size) noexcept
{
memcpy(page_buf + offset, buf, size);
if (cur_page_no == 0 && offset == 0)
{
binlog_header_data header;
/*
This recovery record is for the file header page.
This record is special, it covers only the used part of the header page.
The reaminder of the page must be set to zeroes.
Additionally, there is an extra CRC corresponding to a minimum
page size of IBB_PAGE_SIZE_MIN, in anticipation for future configurable
page size.
*/
memset(page_buf + size, 0, ibb_page_size - (size + BINLOG_PAGE_DATA_END));
cur_page_offset= (uint32_t)ibb_page_size - BINLOG_PAGE_DATA_END;
uint32_t payload= IBB_HEADER_PAGE_SIZE - BINLOG_PAGE_CHECKSUM;
int4store(page_buf + payload, my_crc32c(0, page_buf, payload));
fsp_binlog_extract_header_page(page_buf, &header);
if (header.is_invalid)
{
sql_print_error("InnoDB: Corrupt or invalid file header found during "
"recovery of file number %" PRIu64, cur_file_no);
return !srv_force_recovery;
}
if (header.is_empty)
{
sql_print_error("InnoDB: Empty file header found during "
"recovery of file number %" PRIu64, cur_file_no);
return !srv_force_recovery;
}
if (header.file_no != cur_file_no)
{
sql_print_error("InnoDB: Inconsistency in file header during recovery. "
"The header in file number %" PRIu64 " is for file "
"number %" PRIu64, cur_file_no, header.file_no);
return !srv_force_recovery;
}
return false;
}
cur_page_offset= offset + (uint32_t)size;
return false;
}
@@ -1107,7 +1145,7 @@ innodb_binlog_init_state()
binlog_cur_page_no= 0;
binlog_cur_page_offset= BINLOG_PAGE_DATA;
current_binlog_state_interval=
(uint32_t)(innodb_binlog_state_interval >> ibb_page_size_shift);
(uint64_t)(innodb_binlog_state_interval >> ibb_page_size_shift);
ut_a(innodb_binlog_state_interval ==
(current_binlog_state_interval << ibb_page_size_shift));
}
@@ -1149,6 +1187,7 @@ binlog_sync_initial()
mtr.commit();
log_buffer_flush_to_disk(true);
binlog_page_fifo->flush_up_to(0, 0);
binlog_page_fifo->do_fdatasync(0);
}
@@ -1300,41 +1339,57 @@ binlog_page_empty(const byte *page)
static int
find_pos_in_binlog(uint64_t file_no, size_t file_size, byte *page_buf,
uint32_t *out_page_no, uint32_t *out_pos_in_page)
uint32_t *out_page_no, uint32_t *out_pos_in_page,
uint64_t *out_state_interval)
{
const uint32_t page_size= (uint32_t)ibb_page_size;
const uint32_t page_size_shift= (uint32_t)ibb_page_size_shift;
const uint32_t idx= file_no & 1;
char file_name[OS_FILE_MAX_PATH];
uint32_t p_0, p_1, p_2, last_nonempty;
dberr_t err;
byte *p, *page_end;
bool ret;
binlog_header_data header_data;
*out_page_no= 0;
*out_pos_in_page= BINLOG_PAGE_DATA;
*out_state_interval= 0;
binlog_name_make(file_name, file_no);
pfs_os_file_t fh= os_file_create(innodb_data_file_key, file_name,
OS_FILE_OPEN, OS_DATA_FILE,
srv_read_only_mode, &ret);
if (!ret) {
sql_print_warning("Unable to open file '%s'", file_name);
sql_print_warning("InnoDB: Unable to open file '%s'", file_name);
return -1;
}
err= os_file_read(IORequestRead, fh, page_buf, 0, page_size, nullptr);
if (err != DB_SUCCESS) {
int res= crc32_pread_page(fh, page_buf, 0, MYF(MY_WME));
if (res <= 0) {
os_file_close(fh);
return -1;
}
if (binlog_page_empty(page_buf)) {
fsp_binlog_extract_header_page(page_buf, &header_data);
if (header_data.is_invalid)
{
sql_print_error("InnoDB: Invalid or corrupt file header in file "
"'%s'", file_name);
return -1;
}
if (header_data.is_empty) {
ret=
fsp_binlog_open(file_name, fh, file_no, file_size, ~(uint32_t)0, nullptr);
binlog_cur_written_offset[idx].store(0, std::memory_order_relaxed);
binlog_cur_end_offset[idx].store(0, std::memory_order_relaxed);
return (ret ? -1 : 0);
}
if (header_data.file_no != file_no)
{
sql_print_error("InnoDB: Inconsistent file header in file '%s', "
"wrong file_no %" PRIu64, file_name, header_data.file_no);
return -1;
}
*out_state_interval= header_data.diff_state_interval;
last_nonempty= 0;
/*
@@ -1348,9 +1403,8 @@ find_pos_in_binlog(uint64_t file_no, size_t file_size, byte *page_buf,
break;
ut_ad(p_0 < p_2);
p_1= (p_0 + p_2) / 2;
err= os_file_read(IORequestRead, fh, page_buf, p_1 << page_size_shift,
page_size, nullptr);
if (err != DB_SUCCESS) {
res= crc32_pread_page(fh, page_buf, p_1, MYF(MY_WME));
if (res <= 0) {
os_file_close(fh);
return -1;
}
@@ -1368,9 +1422,8 @@ find_pos_in_binlog(uint64_t file_no, size_t file_size, byte *page_buf,
This sometimes does an extra read, but as this is only during startup it
does not matter.
*/
err= os_file_read(IORequestRead, fh, page_buf,
last_nonempty << page_size_shift, page_size, nullptr);
if (err != DB_SUCCESS) {
res= crc32_pread_page(fh, page_buf, last_nonempty, MYF(MY_WME));
if (res <= 0) {
os_file_close(fh);
return -1;
}
@@ -1416,6 +1469,7 @@ innodb_binlog_discover()
const uint32_t page_size= (uint32_t)ibb_page_size;
const uint32_t page_size_shift= (uint32_t)ibb_page_size_shift;
struct found_binlogs UNINIT_VAR(binlog_files);
uint64_t diff_state_interval;
int res= scan_for_binlogs(innodb_binlog_directory, &binlog_files, false);
if (res <= 0)
@@ -1437,10 +1491,12 @@ innodb_binlog_discover()
res= find_pos_in_binlog(binlog_files.last_file_no,
binlog_files.last_size,
page_buf.get(), &page_no, &pos_in_page);
page_buf.get(), &page_no, &pos_in_page,
&diff_state_interval);
if (res < 0) {
file_no= binlog_files.last_file_no;
active_binlog_file_no.store(file_no, std::memory_order_release);
current_binlog_state_interval= innodb_binlog_state_interval;
sql_print_warning("Binlog number %llu could no be opened. Starting a new "
"binlog file from number %llu",
binlog_files.last_file_no, (file_no + 1));
@@ -1451,6 +1507,7 @@ innodb_binlog_discover()
/* Found start position in the last binlog file. */
file_no= binlog_files.last_file_no;
active_binlog_file_no.store(file_no, std::memory_order_release);
current_binlog_state_interval= diff_state_interval;
binlog_cur_page_no= page_no;
binlog_cur_page_offset= pos_in_page;
ib::info() << "Continuing binlog number " << file_no << " from position "
@@ -1465,10 +1522,12 @@ innodb_binlog_discover()
res= find_pos_in_binlog(binlog_files.prev_file_no,
binlog_files.prev_size,
page_buf.get(),
&prev_page_no, &prev_pos_in_page);
&prev_page_no, &prev_pos_in_page,
&diff_state_interval);
if (res < 0) {
file_no= binlog_files.last_file_no;
active_binlog_file_no.store(file_no, std::memory_order_release);
current_binlog_state_interval= innodb_binlog_state_interval;
binlog_cur_page_no= page_no;
binlog_cur_page_offset= pos_in_page;
sql_print_warning("Binlog number %llu could not be opened, starting "
@@ -1478,6 +1537,7 @@ innodb_binlog_discover()
}
file_no= binlog_files.prev_file_no;
active_binlog_file_no.store(file_no, std::memory_order_release);
current_binlog_state_interval= diff_state_interval;
binlog_cur_page_no= prev_page_no;
binlog_cur_page_offset= prev_pos_in_page;
ib::info() << "Continuing binlog number " << file_no << " from position "
@@ -1490,6 +1550,7 @@ innodb_binlog_discover()
/* Just one empty binlog file found. */
file_no= binlog_files.last_file_no;
active_binlog_file_no.store(file_no, std::memory_order_release);
current_binlog_state_interval= innodb_binlog_state_interval;
binlog_cur_page_no= page_no;
binlog_cur_page_offset= pos_in_page;
ib::info() << "Continuing binlog number " << file_no << " from position "
@@ -1501,6 +1562,7 @@ innodb_binlog_discover()
file_no= 0;
earliest_binlog_file_no= 0;
total_binlog_used_size= 0;
current_binlog_state_interval= innodb_binlog_state_interval;
ib::info() << "Starting a new binlog from file number " << file_no << ".";
return 0;
}
@@ -1532,8 +1594,8 @@ void innodb_binlog_close(bool shutdown)
if (shutdown && innodb_binlog_inited >= 1)
{
binlog_diff_state.free();
mysql_mutex_destroy(&purge_binlog_mutex);
fsp_binlog_shutdown();
mysql_mutex_destroy(&purge_binlog_mutex);
}
}
@@ -1624,38 +1686,56 @@ innodb_binlog_prealloc_thread()
}
bool
ibb_write_header_page(mtr_t *mtr, uint64_t file_no, uint64_t file_size_in_pages,
lsn_t start_lsn, uint64_t gtid_state_interval_in_pages)
{
fsp_binlog_page_entry *block;
uint32_t used_bytes;
block= binlog_page_fifo->create_page(file_no, 0);
ut_a(block /* ToDo: error handling? */);
byte *ptr= &block->page_buf[0];
int4store(ptr, IBB_MAGIC);
int4store(ptr + 4, ibb_page_size_shift);
int4store(ptr + 8, IBB_FILE_VERS_MAJOR);
int4store(ptr + 12, IBB_FILE_VERS_MINOR);
int8store(ptr + 16, file_no);
int8store(ptr + 24, file_size_in_pages);
int8store(ptr + 32, start_lsn);
int8store(ptr + 40, gtid_state_interval_in_pages);
used_bytes= 48;
ut_ad(ibb_page_size >= IBB_HEADER_PAGE_SIZE);
memset(ptr + used_bytes, 0, ibb_page_size - (used_bytes + BINLOG_PAGE_CHECKSUM));
/*
For future expansion with configurable page size:
Write a CRC32 at the end of the minimal page size. This way, the header
page can be read and checksummed without knowing the page size used in
the file, and then the actual page size can be obtained from the header
page.
*/
const uint32_t payload= IBB_HEADER_PAGE_SIZE - BINLOG_PAGE_CHECKSUM;
int4store(ptr + payload, my_crc32c(0, ptr, payload));
fsp_log_header_page(mtr, block, used_bytes);
binlog_page_fifo->release_page_mtr(block, mtr);
return false; // No error
}
__attribute__((noinline))
static ssize_t
serialize_gtid_state(rpl_binlog_state_base *state, byte *buf, size_t buf_size,
uint32_t file_size_in_pages, uint64_t file_no,
bool is_first_page)
serialize_gtid_state(rpl_binlog_state_base *state, byte *buf, size_t buf_size)
noexcept
{
unsigned char *p= (unsigned char *)buf;
/*
1 uint64_t for the current LSN at start of binlog file.
1 uint64_t for the file_no.
1 uint32_t for the file size in pages.
1 uint32_t for the innodb_binlog_state_interval in pages.
1 uint64_t for the number of entries in the state stored.
2 uint32_t + 1 uint64_t for at least one GTID.
*/
ut_ad(buf_size >= 4*COMPR_INT_MAX32 + 4*COMPR_INT_MAX64);
if (is_first_page) {
/*
In the first page where we put the full state, include the value of the
setting for the interval at which differential states are binlogged, so
we know how to search them independent of how the setting changes.
We also include the current LSN for recovery purposes; and the file
length and file_no, which is also useful if we have to recover the whole
file from the redo log after a crash.
*/
p= compr_int_write(p, log_sys.get_lsn(std::memory_order_acquire));
p= compr_int_write(p, file_no);
p= compr_int_write(p, file_size_in_pages);
/* ToDo: Check that this current_binlog_state_interval is the correct value! */
p= compr_int_write(p, current_binlog_state_interval);
}
ut_ad(buf_size >= 2*COMPR_INT_MAX32 + 2*COMPR_INT_MAX64);
p= compr_int_write(p, state->count_nolock());
unsigned char * const pmax=
p + (buf_size - (2*COMPR_INT_MAX32 + COMPR_INT_MAX64));
@@ -1678,8 +1758,7 @@ serialize_gtid_state(rpl_binlog_state_base *state, byte *buf, size_t buf_size,
bool
binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
fsp_binlog_page_entry * &block, uint32_t &page_no,
uint32_t &page_offset, uint64_t file_no,
uint32_t file_size_in_pages)
uint32_t &page_offset, uint64_t file_no)
{
/*
Use a small, efficient stack-allocated buffer by default, falling back to
@@ -1690,9 +1769,7 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
uint32_t block_page_no= ~(uint32_t)0;
block= nullptr;
ssize_t used_bytes= serialize_gtid_state(state, small_buf, sizeof(small_buf),
file_size_in_pages, file_no,
page_no==0);
ssize_t used_bytes= serialize_gtid_state(state, small_buf, sizeof(small_buf));
if (used_bytes >= 0)
{
buf= small_buf;
@@ -1706,8 +1783,7 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
if (UNIV_UNLIKELY(!alloced_buf))
return true;
buf= alloced_buf;
used_bytes= serialize_gtid_state(state, buf, buf_size, file_size_in_pages,
file_no, page_no==0);
used_bytes= serialize_gtid_state(state, buf, buf_size);
if (UNIV_UNLIKELY(used_bytes < 0))
{
ut_ad(0 /* Shouldn't happen, as we allocated maximum needed size. */);
@@ -1722,6 +1798,8 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
/* For now, GTID state always at the start of a page. */
ut_ad(page_offset == BINLOG_PAGE_DATA);
/* Page 0 is reserved for the header page. */
ut_ad(page_no != 0);
/*
Only write the GTID state record if there is room for actual event data
@@ -1792,16 +1870,12 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
*/
static int
read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page,
uint32_t page_no, binlog_header_data *out_header_data)
uint32_t page_no) noexcept
{
const byte *p= page + BINLOG_PAGE_DATA;
byte t= *p;
if (UNIV_UNLIKELY((t & FSP_BINLOG_TYPE_MASK) != FSP_BINLOG_TYPE_GTID_STATE))
{
out_header_data->is_empty= binlog_page_empty(page);
return 0;
}
out_header_data->is_empty= false;
/* ToDo: Handle reading a state that spans multiple pages. For now, we assume the state fits in a single page. */
ut_a(t & FSP_BINLOG_FLAG_LAST);
@@ -1811,42 +1885,6 @@ read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page,
return -1;
std::pair<uint64_t, const unsigned char *> v_and_p= compr_int_read(p + 3);
p= v_and_p.second;
if (page_no == 0)
{
/*
The state in the first page has four extra words: The start LSN of the
file; the file_no of the file; the file length, in pages; and the offset
between differential binlog states logged regularly in the binlog
tablespace.
*/
if (UNIV_UNLIKELY(p >= p_end))
return -1;
out_header_data->start_lsn= (uint32_t)v_and_p.first;
v_and_p= compr_int_read(p);
p= v_and_p.second;
if (UNIV_UNLIKELY(p >= p_end))
return -1;
out_header_data->file_no= v_and_p.first;
v_and_p= compr_int_read(p);
p= v_and_p.second;
if (UNIV_UNLIKELY(p >= p_end) || UNIV_UNLIKELY(v_and_p.first >= UINT32_MAX))
return -1;
out_header_data->page_count= (uint32_t)v_and_p.first;
v_and_p= compr_int_read(p);
p= v_and_p.second;
if (UNIV_UNLIKELY(p >= p_end) || UNIV_UNLIKELY(v_and_p.first >= UINT32_MAX))
return -1;
out_header_data->diff_state_interval= (uint32_t)v_and_p.first;
v_and_p= compr_int_read(p);
p= v_and_p.second;
}
else
{
out_header_data->start_lsn= 0;
out_header_data->file_no= ~(uint64_t)0;
out_header_data->page_count= 0;
out_header_data->diff_state_interval= 0;
}
if (UNIV_UNLIKELY(p > p_end))
return -1;
@@ -1899,8 +1937,7 @@ read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page,
-1 Error
*/
static int
read_gtid_state(rpl_binlog_state_base *state, File file, uint32_t page_no,
binlog_header_data *out_header_data)
read_gtid_state(rpl_binlog_state_base *state, File file, uint32_t page_no)
{
std::unique_ptr<byte [], void (*)(void *)> page_buf
((byte *)my_malloc(PSI_NOT_INSTRUMENTED, ibb_page_size, MYF(MY_WME)),
@@ -1909,12 +1946,11 @@ read_gtid_state(rpl_binlog_state_base *state, File file, uint32_t page_no,
return -1;
/* ToDo: Handle encryption. */
size_t res= crc32_pread_page(file, page_buf.get(), page_no, MYF(MY_WME));
if (UNIV_UNLIKELY(res == (size_t)-1))
int res= crc32_pread_page(file, page_buf.get(), page_no, MYF(MY_WME));
if (UNIV_UNLIKELY(res <= 0))
return -1;
return read_gtid_state_from_page(state, page_buf.get(), page_no,
out_header_data);
return read_gtid_state_from_page(state, page_buf.get(), page_no);
}
@@ -1929,40 +1965,36 @@ read_gtid_state(rpl_binlog_state_base *state, File file, uint32_t page_no,
static bool
binlog_state_recover()
{
binlog_header_data header_data;
rpl_binlog_state_base state;
state.init();
uint32_t diff_state_interval= 0;
uint32_t page_no= 0;
uint64_t active= active_binlog_file_no.load(std::memory_order_relaxed);
uint64_t diff_state_interval= current_binlog_state_interval;
uint32_t page_no= 1;
char filename[OS_FILE_MAX_PATH];
binlog_name_make(filename,
active_binlog_file_no.load(std::memory_order_relaxed));
binlog_name_make(filename, active);
File file= my_open(filename, O_RDONLY | O_BINARY, MYF(MY_WME));
if (UNIV_UNLIKELY(file < (File)0))
return true;
int res= read_gtid_state(&state, file, page_no, &header_data);
int res= read_gtid_state(&state, file, page_no);
if (res < 0)
{
my_close(file, MYF(0));
return true;
}
diff_state_interval= header_data.diff_state_interval;
if (diff_state_interval == 0)
{
sql_print_warning("Invalid differential binlog state interval %llu found "
"in binlog file, ignoring", diff_state_interval);
current_binlog_state_interval= 0; /* Disable in this binlog file */
}
else
{
current_binlog_state_interval= diff_state_interval;
page_no= (uint32_t)(binlog_cur_page_no -
(binlog_cur_page_no % diff_state_interval));
while (page_no > 0)
while (page_no > 1)
{
res= read_gtid_state(&state, file, page_no, &header_data);
res= read_gtid_state(&state, file, page_no);
if (res > 0)
break;
page_no-= (uint32_t)diff_state_interval;
@@ -1970,9 +2002,7 @@ binlog_state_recover()
}
my_close(file, MYF(0));
ha_innodb_binlog_reader reader(active_binlog_file_no.load
(std::memory_order_relaxed),
page_no << ibb_page_size_shift);
ha_innodb_binlog_reader reader(active, page_no << ibb_page_size_shift);
return binlog_recover_gtid_state(&state, &reader);
}
@@ -2410,6 +2440,8 @@ ha_innodb_binlog_reader::ha_innodb_binlog_reader(uint64_t file_no,
{
page_buf= (uchar *)ut_malloc(ibb_page_size, mem_key_binlog);
chunk_rd.set_page_buf(page_buf);
if (offset < ibb_page_size)
offset= ibb_page_size;
chunk_rd.seek(file_no, offset);
chunk_rd.skip_partial(true);
}
@@ -2617,10 +2649,8 @@ gtid_search::~gtid_search()
enum gtid_search::Read_Result
gtid_search::read_gtid_state_file_no(rpl_binlog_state_base *state,
uint64_t file_no, uint32_t page_no,
uint64_t *out_file_end,
uint32_t *out_diff_state_interval)
uint64_t *out_file_end)
{
binlog_header_data header_data;
*out_file_end= 0;
uint64_t active2= active_binlog_file_no.load(std::memory_order_acquire);
if (file_no > active2)
@@ -2675,9 +2705,7 @@ gtid_search::read_gtid_state_file_no(rpl_binlog_state_base *state,
if (block)
{
ut_ad(end_offset != ~(uint64_t)0);
int res= read_gtid_state_from_page(state, block->page_buf, page_no,
&header_data);
*out_diff_state_interval= header_data.diff_state_interval;
int res= read_gtid_state_from_page(state, block->page_buf, page_no);
binlog_page_fifo->release_page(block);
return (Read_Result)res;
}
@@ -2716,8 +2744,7 @@ gtid_search::read_gtid_state_file_no(rpl_binlog_state_base *state,
}
if (!*out_file_end)
*out_file_end= cur_open_file_length;
int res= read_gtid_state(state, cur_open_file, page_no, &header_data);
*out_diff_state_interval= header_data.diff_state_interval;
int res= read_gtid_state(state, cur_open_file, page_no);
return (Read_Result)res;
}
}
@@ -2735,7 +2762,6 @@ gtid_search::read_gtid_state_file_no(rpl_binlog_state_base *state,
0 Position not found (has been purged)
1 Position found
*/
int
gtid_search::find_gtid_pos(slave_connection_state *pos,
rpl_binlog_state_base *out_state,
@@ -2750,14 +2776,39 @@ gtid_search::find_gtid_pos(slave_connection_state *pos,
/* First search backwards for the right file to start from. */
uint64_t file_end= 0;
uint32_t diff_state_page_interval= 0;
uint64_t diff_state_page_interval= 0;
rpl_binlog_state_base base_state, page0_diff_state, tmp_diff_state;
base_state.init();
for (;;)
{
/*
Read the header page, needed to get the binlog diff state interval.
ToDo: Here we instantiate our own binlog_chunk_reader specifically for
this. Later, when read_gtid_state_file_no() is fixed to also use a
binlog_chunk_reader, integrate and use the same single
binlog_chunk_reader object.
*/
binlog_header_data header;
int err;
byte *page_buffer= (byte *)ut_malloc(ibb_page_size, mem_key_binlog);
if (!page_buffer)
{
my_error(ER_OUTOFMEMORY, MYF(0), ibb_page_size);
return -1;
}
{
binlog_chunk_reader chunk_reader;
chunk_reader.set_page_buf(page_buffer);
chunk_reader.seek(file_no, 0);
err= chunk_reader.get_file_header(&header);
diff_state_page_interval= header.diff_state_interval;
}
ut_free(page_buffer);
if (err)
return -1;
enum Read_Result res=
read_gtid_state_file_no(&base_state, file_no, 0, &file_end,
&diff_state_page_interval);
read_gtid_state_file_no(&base_state, file_no, 1, &file_end);
if (res == READ_ENOENT)
return 0;
if (res == READ_ERROR)
@@ -2769,7 +2820,7 @@ gtid_search::find_gtid_pos(slave_connection_state *pos,
/* Handle the special case of a completely empty binlog file. */
out_state->reset_nolock();
*out_file_no= file_no;
*out_offset= 0;
*out_offset= ibb_page_size;
return 1;
}
ut_ad(0 /* Not expected to find no state, should always be written. */);
@@ -2794,20 +2845,18 @@ gtid_search::find_gtid_pos(slave_connection_state *pos,
uint32_t page2= (uint32_t)
(diff_state_page_interval + ((file_end - 1) >> ibb_page_size_shift));
/* Round to the next diff_state_page_interval after file_end. */
page2-= page2 % diff_state_page_interval;
page2-= page2 % (uint32_t)diff_state_page_interval;
uint32_t page1= (page0 + page2) / 2;
page0_diff_state.init();
page0_diff_state.load_nolock(&base_state);
tmp_diff_state.init();
while (page1 >= page0 + diff_state_page_interval)
while (page1 >= page0 + diff_state_page_interval && page1 > 1)
{
ut_ad((page1 - page0) % diff_state_page_interval == 0);
tmp_diff_state.reset_nolock();
tmp_diff_state.load_nolock(&base_state);
uint32_t dummy;
enum Read_Result res=
read_gtid_state_file_no(&tmp_diff_state, file_no, page1, &file_end,
&dummy);
read_gtid_state_file_no(&tmp_diff_state, file_no, page1, &file_end);
if (res == READ_ENOENT)
return 0; /* File purged while we are reading from it? */
if (res == READ_ERROR)
@@ -2819,7 +2868,7 @@ gtid_search::find_gtid_pos(slave_connection_state *pos,
try the one just before. It will be safe, even if not always optimal,
and this is an abnormal situation anyway.
*/
page1= page1 - diff_state_page_interval;
page1= page1 - (uint32_t)diff_state_page_interval;
continue;
}
if (tmp_diff_state.is_before_pos(pos))
@@ -2835,6 +2884,8 @@ gtid_search::find_gtid_pos(slave_connection_state *pos,
ut_ad(page1 >= page0);
out_state->load_nolock(&page0_diff_state);
*out_file_no= file_no;
if (page0 == 0)
page0= 1; /* Skip the initial file header page. */
*out_offset= (uint64_t)page0 << ibb_page_size_shift;
return 1;
}
@@ -2888,6 +2939,8 @@ ha_innodb_binlog_reader::init_legacy_pos(const char *filename, ulonglong offset)
reached. This way we avoid reading garbaga data for invalid request
offset.
*/
if (offset < ibb_page_size)
offset= ibb_page_size;
chunk_rd.seek(file_no, (uint64_t)offset);
chunk_rd.skip_partial(true);
cur_file_no= chunk_rd.current_file_no();
@@ -2980,14 +3033,12 @@ innodb_binlog_get_init_state(rpl_binlog_state_base *out_state)
{
gtid_search search_obj;
uint64_t dummy_file_end;
uint32_t dummy_diff_state_interval;
bool err= false;
mysql_mutex_lock(&purge_binlog_mutex);
uint64_t file_no= earliest_binlog_file_no;
enum gtid_search::Read_Result res=
search_obj.read_gtid_state_file_no(out_state, file_no, 0, &dummy_file_end,
&dummy_diff_state_interval);
search_obj.read_gtid_state_file_no(out_state, file_no, 1, &dummy_file_end);
mysql_mutex_unlock(&purge_binlog_mutex);
if (res != gtid_search::READ_FOUND)
err= true;

View File

@@ -32,7 +32,27 @@ InnoDB implementation of binlog.
struct chunk_data_base;
struct binlog_header_data;
/* 4-byte "magic" identifying InnoDB binlog file (little endian). */
static constexpr uint32_t IBB_MAGIC= 0x010dfefe;
static constexpr uint32_t IBB_FILE_VERS_MAJOR= 0;
static constexpr uint32_t IBB_FILE_VERS_MINOR= 0;
/*
The size of the header page that is stored in the first page of a file.
This is the smallest page size that can be used in a backwards compatible
way. Having a fixed-size small header page means we can get the real page
size of the file from the header page, but still be able to checksum the
header page without relying on unchecked page size field to compute the
checksum.
(The remainder of the header page is just unused or could potentially
later be used for other data as needed).
*/
static constexpr uint32_t IBB_HEADER_PAGE_SIZE= 512;
static constexpr uint32_t IBB_PAGE_SIZE_MIN= IBB_HEADER_PAGE_SIZE;
static constexpr uint32_t IBB_PAGE_SIZE_MAX= 65536;
/** Store crc32 checksum at the end of the page */
#define BINLOG_PAGE_CHECKSUM 4
@@ -266,6 +286,8 @@ public:
of the current binlog (ie. end-of-file).
*/
int read_data(byte *buffer, int max_len, bool multipage);
/* Read the file header of current file_no. */
int get_file_header(binlog_header_data *out_header);
/* Save current position, and restore it later. */
void save_pos(saved_position *out_pos) { *out_pos= s; }
@@ -294,7 +316,8 @@ public:
extern uint32_t ibb_page_size_shift;
extern ulong ibb_page_size;
extern uint32_t current_binlog_state_interval;
/* The state interval (in pages) used for active_binlog_file_no. */
extern uint64_t current_binlog_state_interval;
extern mysql_mutex_t active_binlog_mutex;
extern pthread_cond_t active_binlog_cond;
extern std::atomic<uint64_t> active_binlog_file_no;
@@ -313,11 +336,18 @@ fsp_binlog_release(fsp_binlog_page_entry *page)
extern size_t crc32_pwrite_page(File fd, byte *buf, uint32_t page_no,
myf MyFlags) noexcept;
extern size_t crc32_pread_page(File fd, byte *buf, uint32_t page_no,
extern int crc32_pread_page(File fd, byte *buf, uint32_t page_no,
myf MyFlags) noexcept;
extern int crc32_pread_page(pfs_os_file_t fh, byte *buf, uint32_t page_no,
myf MyFlags) noexcept;
extern void binlog_write_up_to_now() noexcept;
extern void fsp_binlog_extract_header_page(const byte *page_buf,
binlog_header_data *out_header_data)
noexcept;
extern void fsp_log_binlog_write(mtr_t *mtr, fsp_binlog_page_entry *page,
uint32_t page_offset, uint32_t len);
extern void fsp_log_header_page(mtr_t *mtr, fsp_binlog_page_entry *page,
uint32_t len) noexcept;
extern void fsp_binlog_init();
extern void fsp_binlog_shutdown();
extern dberr_t fsp_binlog_tablespace_close(uint64_t file_no);

View File

@@ -76,8 +76,8 @@ struct chunk_data_flush : public chunk_data_base {
/*
Data stored at the start of each binlog file.
(The data is stored in the file as compressed integers; this is just a
struct to pass around the values in-memory).
(The data is stored as little-engian values in the first page of the file;
this is just a struct to pass around the values in-memory).
*/
struct binlog_header_data {
/*
@@ -93,16 +93,28 @@ struct binlog_header_data {
*/
uint64_t file_no;
/* The length of this binlog file, in pages. */
uint32_t page_count;
uint64_t page_count;
/*
The interval (in pages) at which the (differential) binlog GTID state is
written into the binlog file, for faster GTID position search. This
corresponds to the value of --innodb-binlog-state-interval at the time the
binlog file was created.
*/
uint32_t diff_state_interval;
uint64_t diff_state_interval;
/* The log_2 of the page size (eg. ibb_page_size_shift). */
uint32_t page_size_shift;
/*
Major and minor file format version number. The idea is that minor version
increments are backwards compatible, major version upgrades are not.
*/
uint32_t vers_major, vers_minor;
/* Whether the page was found empty. */
bool is_empty;
/*
Whether the page was found invalid, bad magic or major version, or CRC32
error (and not empty).
*/
bool is_invalid;
};
@@ -149,10 +161,12 @@ binlog_name_make_short(char *name_buf, uint64_t file_no)
extern void innodb_binlog_startup_init();
extern bool innodb_binlog_init(size_t binlog_size, const char *directory);
extern void innodb_binlog_close(bool shutdown);
extern bool ibb_write_header_page(mtr_t *mtr, uint64_t file_no,
uint64_t file_size_in_pages, lsn_t start_lsn,
uint64_t gtid_state_interval_in_pages);
extern bool binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
fsp_binlog_page_entry * &block, uint32_t &page_no,
uint32_t &page_offset, uint64_t file_no,
uint32_t file_size_in_pages);
uint32_t &page_offset, uint64_t file_no);
extern bool innodb_binlog_oob(THD *thd, const unsigned char *data,
size_t data_len, void **engine_data);
extern void innodb_free_oob(THD *thd, void *engine_data);