1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-34705: Binlog-in-engine: First working recovery

Still needs more testing.

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
Kristian Nielsen
2025-03-12 16:57:42 +01:00
parent 1582a6d885
commit b3c6bbdbd3
9 changed files with 934 additions and 129 deletions

View File

@@ -16,25 +16,25 @@ SELECT @@GLOBAL.binlog_checksum;
NONE
SHOW MASTER STATUS;
File Position Binlog_Do_DB Binlog_Ignore_DB
binlog-000000.ibb 767
SHOW BINLOG EVENTS IN "binlog-000000.ibb";
binlog-000000.ibb #
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
binlog-000000.ibb 0 Gtid 1 0 GTID 0-1-1
binlog-000000.ibb 0 Query 1 0 use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB
binlog-000000.ibb 0 Gtid 1 0 BEGIN GTID 0-1-2
binlog-000000.ibb 0 Query 1 0 use `test`; INSERT INTO t1 VALUES (1)
binlog-000000.ibb 0 Xid 1 0 COMMIT /* xid=34 */
binlog-000000.ibb 0 Gtid 1 0 BEGIN GTID 0-1-3
binlog-000000.ibb 0 Query 1 0 use `test`; INSERT INTO t1 VALUES (2)
binlog-000000.ibb 0 Query 1 0 use `test`; INSERT INTO t1 VALUES (3)
binlog-000000.ibb 0 Xid 1 0 COMMIT /* xid=36 */
binlog-000000.ibb 0 Gtid 1 0 GTID 0-1-4
binlog-000000.ibb 0 Query 1 0 use `test`; DROP TABLE `t1` /* generated by server */
SHOW BINLOG EVENTS LIMIT 2, 3;
binlog-000000.ibb # Gtid # # GTID #-#-#
binlog-000000.ibb # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (1)
binlog-000000.ibb # Xid # # COMMIT /* XID */
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (2)
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (3)
binlog-000000.ibb # Xid # # COMMIT /* XID */
binlog-000000.ibb # Gtid # # GTID #-#-#
binlog-000000.ibb # Query # # use `test`; DROP TABLE `t1` /* generated by server */
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
binlog-000000.ibb 0 Gtid 1 0 BEGIN GTID 0-1-2
binlog-000000.ibb 0 Query 1 0 use `test`; INSERT INTO t1 VALUES (1)
binlog-000000.ibb 0 Xid 1 0 COMMIT /* xid=34 */
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (1)
binlog-000000.ibb # Xid # # COMMIT /* XID */
CREATE TABLE t2 (a INT PRIMARY KEY, b VARCHAR(2048)) ENGINE=InnoDB;
SET SESSION binlog_format= ROW;
*** Do 1500 transactions ...

View File

@@ -27,9 +27,14 @@ SELECT @@GLOBAL.binlog_checksum;
# If this gets too annoying to do, we can replace this with something that
# checks that the reported file and position is within some reasonable range
# of the value left by current code.
--replace_column 2 #
SHOW MASTER STATUS;
SHOW BINLOG EVENTS IN "binlog-000000.ibb";
SHOW BINLOG EVENTS LIMIT 2, 3;
--let $binlog_file= binlog-000000.ibb
--let $binlog_start= 0
--source include/show_binlog_events.inc
--let $binlog_file=
--let $binlog_limit= 2, 3
--source include/show_binlog_events.inc
CREATE TABLE t2 (a INT PRIMARY KEY, b VARCHAR(2048)) ENGINE=InnoDB;

View File

@@ -0,0 +1,56 @@
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1);
# Flush all dirty pages from buffer pool
SET @no_checkpoint_save_pct= @@GLOBAL.innodb_max_dirty_pages_pct;
SET @no_checkpoint_save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm;
SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0;
SET GLOBAL innodb_max_dirty_pages_pct=0.0;
SET GLOBAL innodb_max_dirty_pages_pct= @no_checkpoint_save_pct;
SET GLOBAL innodb_max_dirty_pages_pct_lwm= @no_checkpoint_save_pct_lwm;
BEGIN;
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
COMMIT;
INSERT INTO t1 VALUES (4);
INSERT INTO t1 VALUES (5);
INSERT INTO t1 VALUES (6);
INSERT INTO t1 VALUES (7);
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
5
6
7
SET SESSION debug_dbug="+d,crash_dispatch_command_before";
SELECT 1;
Got one of the listed errors
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
binlog-000000.ibb # Gtid # # GTID #-#-#
binlog-000000.ibb # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (1)
binlog-000000.ibb # Xid # # COMMIT /* XID */
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (2)
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (3)
binlog-000000.ibb # Xid # # COMMIT /* XID */
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (4)
binlog-000000.ibb # Xid # # COMMIT /* XID */
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (5)
binlog-000000.ibb # Xid # # COMMIT /* XID */
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (6)
binlog-000000.ibb # Xid # # COMMIT /* XID */
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (7)
binlog-000000.ibb # Xid # # COMMIT /* XID */
DROP TABLE t1;

View File

@@ -14,7 +14,6 @@ INSERT INTO t1 VALUES (1);
--let $no_checkpoint_flush= 1
--let $no_checkpoint_kill= 1
--source ../../suite/innodb/include/no_checkpoint_start.inc
SHOW MASTER STATUS;
--let $file= query_get_value(SHOW MASTER STATUS, File, 1)
--let $pos= query_get_value(SHOW MASTER STATUS, Position, 1)
@@ -23,8 +22,11 @@ BEGIN;
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
COMMIT;
INSERT INTO t1 VALUES (4);
INSERT INTO t1 VALUES (5);
INSERT INTO t1 VALUES (6);
INSERT INTO t1 VALUES (7);
SELECT * FROM t1 ORDER BY a;
DROP TABLE t1;
# Crash the server
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
@@ -55,5 +57,7 @@ EOF
--enable_reconnect
--source include/wait_until_connected_again.inc
SHOW MASTER STATUS;
SHOW BINLOG EVENTS;
--let $binlog_file=
--let $binlog_start= 0
--source include/show_binlog_events.inc
DROP TABLE t1;

View File

@@ -4819,11 +4819,11 @@ MYSQL_BIN_LOG::reset_engine_binlogs(THD *thd, rpl_gtid *init_state,
mysql_mutex_lock(&LOCK_log);
mysql_mutex_lock(&LOCK_index);
err= (*opt_binlog_engine_hton->reset_binlogs)();
if (init_state)
rpl_global_gtid_binlog_state.load(init_state, init_state_len);
else
rpl_global_gtid_binlog_state.reset();
err= (*opt_binlog_engine_hton->reset_binlogs)();
mysql_mutex_unlock(&LOCK_index);
mysql_mutex_unlock(&LOCK_log);

View File

@@ -618,12 +618,8 @@ fsp_log_binlog_write(mtr_t *mtr, fsp_binlog_page_entry *page,
page_offset= 0;
page->flushed_clean= false;
}
mtr->write_binlog(LOG_BINLOG_ID_0 + (file_no & 1), page_no,
(uint16_t)page_offset, page_offset + &page->page_buf[0],
len);
sql_print_information("ToDo2: %d, page=%u, off=%u, len=%u)", (int)(file_no & 1), page_no, page_offset, len);
for (uint32_t i= page_offset; i < page_offset+len; i+=8)
sql_print_information("ToDo2: 0x%04x %02X %02X %02X %02X %02X %02X %02X %02X", i, page->page_buf[i], page->page_buf[i+1], page->page_buf[i+2], page->page_buf[i+3], page->page_buf[i+4], page->page_buf[i+5], page->page_buf[i+6], page->page_buf[i+7]);
mtr->write_binlog((file_no & 1), page_no, (uint16_t)page_offset,
page_offset + &page->page_buf[0], len);
}
/*
@@ -809,14 +805,6 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
and available; binlog tablespace N is active while (N+1) is being
pre-allocated. Only under extreme I/O pressure should be need to
stall here.
ToDo: Handle recovery. Idea: write the current LSN at the start of
the binlog tablespace when we create it. At recovery, we should open
the (at most) 2 most recent binlog tablespaces. Whenever we have a
redo record, skip it if its LSN is smaller than the one stored in the
tablespace corresponding to its space_id. This way, it should be safe
to re-use tablespace ids between just two, SRV_SPACE_ID_BINLOG0 and
SRV_SPACE_ID_BINLOG1.
*/
ut_ad(!pending_prev_end_offset);
pending_prev_end_offset= page_no << page_size_shift;
@@ -981,21 +969,6 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
}
/*
Empty chunk data, used to pass a dummy record to fsp_binlog_write_rec()
in fsp_binlog_flush().
*/
struct chunk_data_flush : public chunk_data_base {
~chunk_data_flush() { }
virtual std::pair<uint32_t, bool> copy_data(byte *p, uint32_t max_len) final
{
memset(p, 0xff, max_len);
return {max_len, true};
}
};
/*
Implementation of FLUSH BINARY LOGS.
Truncate the current binlog tablespace, fill up the last page with dummy data
@@ -1080,6 +1053,7 @@ fsp_binlog_flush()
mtr.start();
fsp_binlog_write_rec(&dummy_data, &mtr, FSP_BINLOG_TYPE_FILLER);
mtr.commit();
log_buffer_flush_to_disk(srv_flush_log_at_trx_commit & 1);
return false;
}

View File

@@ -408,10 +408,635 @@ struct found_binlogs {
};
/*
This structure holds the state needed during InnoDB recovery for recovering
binlog tablespace files.
*/
class binlog_recovery {
public:
struct found_binlogs scan_result;
byte *page_buf;
const char *binlog_dir;
/*
The current file number being recovered.
This starts out as the most recent existing non-empty binlog that has a
starting LSN no bigger than the recovery starting LSN. This should always be
one of the two most recent binlog files found at startup.
*/
uint64_t cur_file_no;
/* The physical length of cur_file_no file. */
uint64_t cur_phys_size;
/*
The starting LSN (as stored in the header of the binlog tablespace file).
No redo prior to this LSN should be applied to this file.
*/
lsn_t start_file_lsn;
/* Open file for cur_file_no, or -1 if not open. */
File cur_file_fh;
/* The sofar position of redo in cur_file_no (end point of previous redo). */
uint32_t cur_page_no;
uint32_t cur_page_offset;
/* The path to cur_file_no. */
char full_path[OS_FILE_MAX_PATH];
bool inited;
/*
Flag set in case of severe error and --innodb-force_recovery to completely
skip any binlog recovery.
*/
bool skip_recovery;
/*
Special case, if we start from completely empty (no non-empty binlog files).
This should recover into an empty binlog state.
*/
bool start_empty;
/*
Special case: The last two files are empty. Then we ignore the last empty
file and use the 2 previous files instead. The ignored file is deleted only
after successful recovery, to try to avoid destroying data in case of
recovery problems.
*/
bool ignore_last;
/*
Mark the case where the first binlog tablespace file we need to consider for
recovery has file LSN that is later than the first redo record; in this case
we need to skip records until the first one that applies to this file.
*/
bool skipping_early_lsn;
/*
Skip any initial records until the start of a page. We are guaranteed that
any page that needs to be recovered will have recovery data for the whole
page, and this way we never need to read-modify-write pages during recovery.
*/
bool skipping_partial_page;
bool init_recovery(bool space_id, uint32_t page_no, uint16_t offset,
lsn_t start_lsn, lsn_t lsn,
const byte *buf, size_t size) noexcept;
bool apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
lsn_t start_lsn, lsn_t lsn,
const byte *buf, size_t size) noexcept;
int get_header(uint64_t file_no, lsn_t &out_lsn, bool &out_empty) noexcept;
bool init_recovery_from(uint64_t file_no, lsn_t file_lsn, uint32_t page_no,
uint16_t offset, lsn_t lsn,
const byte *buf, size_t size) noexcept;
void init_recovery_empty() noexcept;
void init_recovery_skip_all() noexcept;
void end_actions(bool recovery_successful) noexcept;
void release() noexcept;
bool open_cur_file() noexcept;
bool flush_page() noexcept;
void zero_out_cur_file();
bool close_file() noexcept;
bool next_file() noexcept;
bool next_page() noexcept;
void update_page_from_record(uint16_t offset,
const byte *buf, size_t size) noexcept;
};
static binlog_recovery recover_obj;
static void innodb_binlog_prealloc_thread();
static int scan_for_binlogs(const char *binlog_dir, found_binlogs *binlog_files,
bool error_if_missing) noexcept;
static int innodb_binlog_discover();
static bool binlog_state_recover();
static void innodb_binlog_autopurge(uint64_t first_open_file_no);
static int read_gtid_state_from_page(rpl_binlog_state_base *state,
const byte *page, uint32_t page_no,
binlog_header_data *out_header_data);
/*
Read the header of a binlog tablespace file identified by file_no.
Sets the out_empty false if the file is empty or has checksum error (or
is missing).
Else sets out_empty true and sets out_lsn from the header.
Returns:
-1 error
0 File is missing (ENOENT)
1 File found (but may be empty according to out_empty).
*/
int
binlog_recovery::get_header(uint64_t file_no, lsn_t &out_lsn, bool &out_empty)
noexcept
{
char full_path[OS_FILE_MAX_PATH];
rpl_binlog_state_base dummy_state;
binlog_header_data header;
out_empty= true;
out_lsn= 0;
binlog_name_make(full_path, file_no, binlog_dir);
File fh= my_open(full_path, O_RDONLY | O_BINARY, MYF(0));
if (fh < (File)0)
return (my_errno == ENOENT ? 0 : -1);
size_t read= my_pread(fh, page_buf, srv_page_size, 0, MYF(0));
my_close(fh, MYF(0));
if (UNIV_UNLIKELY(read == (size_t)-1))
return -1;
if (read == 0)
return 0;
dummy_state.init();
int res= read_gtid_state_from_page(&dummy_state, page_buf, 0, &header);
if (res <= 0)
return res;
if (!header.is_empty)
{
out_empty= false;
out_lsn= header.start_lsn;
}
return 1;
}
bool binlog_recovery::init_recovery(bool space_id, uint32_t page_no,
uint16_t offset,
lsn_t start_lsn, lsn_t end_lsn,
const byte *buf, size_t size) noexcept
{
/* Start by initializing resource pointers so we are safe to releaes(). */
cur_file_fh= (File)-1;
if (!(page_buf= (byte *)ut_malloc(srv_page_size, mem_key_binlog)))
{
my_error(ER_OUTOFMEMORY, MYF(MY_WME), srv_page_size);
return true;
}
memset(page_buf, 0, srv_page_size);
inited= true;
/*
ToDo: It would be good to find a way to not duplicate this logic for
where the binlog tablespace filess are stored with the code in
innodb_binlog_init(). But it's a bit awkward, because InnoDB recovery
runs during plugin init, so not even available for the server to call
into until after recovery is done.
*/
binlog_dir= opt_binlog_directory;
if (!binlog_dir || !binlog_dir[0])
binlog_dir= ".";
if (scan_for_binlogs(binlog_dir, &scan_result, true) <= 0)
return true;
/*
Here we find the two most recent, non-empty binlogs to do recovery on.
Before we allocate binlog tablespace file N+2, we flush and fsync file N
to disk. This ensures that we only ever need to apply redo records to the
two most recent files during recovery.
A special case however arises if the two most recent binlog files are
both completely empty. Then we do not have any LSN to match against to
know if a redo record applies to one of these two files, or to an earlier
file with same value of bit 0 of the file_no. In this case, we ignore the
most recent file (deleting it later after successful recovery), and
consider instead the two prior files, the first of which is guaranteed to
have durably saved a starting LSN to use.
Hence the loop, which can only ever have one or two iterations.
A further special case is if there are fewer than two (or three if last
two are empty) files. If there are no files, or only empty files, then the
server must have stopped just after RESET MASTER (or just after
initializing the binlogs at first startup), and we should just start the
binlogs from scratch.
*/
ignore_last= false;
uint64_t file_no2= scan_result.last_file_no;
uint64_t file_no1= scan_result.prev_file_no;
int num_binlogs= scan_result.found_binlogs;
for (;;)
{
lsn_t lsn1= 0, lsn2= 0;
bool is_empty1= true, is_empty2= true;
int res2= get_header(file_no2, lsn2, is_empty2);
if (num_binlogs == 0 ||
(num_binlogs == 1 && is_empty2))
{
init_recovery_empty();
return false;
}
if (num_binlogs == 1)
return init_recovery_from(file_no2 + (space_id != (file_no2 & 1)), lsn2,
page_no, offset, start_lsn, buf, size);
int res1= get_header(file_no1, lsn1, is_empty1);
if (res2 < 0 && !srv_force_recovery)
{
sql_print_error("InnoDB: I/O error reading binlog file number " PRIu64,
file_no2);
return true;
}
if (res1 < 0 && !srv_force_recovery)
{
sql_print_error("InnoDB: I/O error reading binlog file number " PRIu64,
file_no1);
return true;
}
if (is_empty1 && is_empty2)
{
if (!ignore_last)
{
ignore_last= true;
if (file_no2 > scan_result.earliest_file_no)
{
--file_no2;
if (file_no1 > scan_result.earliest_file_no)
--file_no1;
else
--num_binlogs;
}
else
--num_binlogs;
continue;
}
if (srv_force_recovery)
{
/*
If the last 3 files are empty, we cannot get an LSN to know which
records apply to each file. This should not happen unless there is
damage to the file system. If force recovery is requested, we must
simply do no recovery at all on the binlog files.
*/
sql_print_warning("InnoDB: Binlog tablespace file recovery is not "
"possible. Recovery is skipped due to "
"--innodb-force-recovery");
init_recovery_skip_all();
return false;
}
sql_print_error("InnoDB: Last 3 binlog tablespace files are all empty. "
"Recovery is not possible");
return true;
}
if (is_empty2)
lsn2= lsn1;
if (space_id == (file_no2 & 1) && start_lsn >= lsn1)
{
if (start_lsn < lsn2 && !srv_force_recovery)
{
sql_print_error("InnoDB: inconsistent space_id %d for lsn=%" LSN_PF,
(int)space_id, start_lsn);
return true;
}
return init_recovery_from(file_no2, lsn2,
page_no, offset, start_lsn, buf, size);
}
else
return init_recovery_from(file_no1, lsn1,
page_no, offset, start_lsn, buf, size);
/* NotReached. */
}
}
bool
binlog_recovery::init_recovery_from(uint64_t file_no, lsn_t file_lsn,
uint32_t page_no, uint16_t offset,
lsn_t lsn, const byte *buf, size_t size)
noexcept
{
cur_file_no= file_no;
cur_phys_size= 0;
start_file_lsn= file_lsn;
cur_page_no= page_no;
cur_page_offset= 0;
skip_recovery= false;
start_empty= false;
skipping_partial_page= true;
if (lsn < start_file_lsn)
skipping_early_lsn= true;
else
{
skipping_early_lsn= false;
if (offset <= FIL_PAGE_DATA)
{
update_page_from_record(offset, buf, size);
skipping_partial_page= false;
}
}
return false;
}
/*
Initialize recovery from the state where there are no binlog files, or only
completely empty binlog files. In this case we have no file LSN to compare
redo records against.
This can only happen if we crash immediately after RESET MASTER (or fresh
server installation) as an initial file header is durably written to disk
before binlogging new data. Therefore we should skip _all_ redo records and
recover into a completely empty state.
*/
void
binlog_recovery::init_recovery_empty() noexcept
{
cur_file_no= 0;
cur_phys_size= 0;
start_file_lsn= (lsn_t)0;
cur_page_no= 0;
cur_page_offset= 0;
skip_recovery= false;
start_empty= true;
ignore_last= false;
skipping_early_lsn= false;
skipping_partial_page= true;
}
void
binlog_recovery::init_recovery_skip_all() noexcept
{
skip_recovery= true;
}
void
binlog_recovery::end_actions(bool recovery_successful) noexcept
{
char full_path[OS_FILE_MAX_PATH];
if (recovery_successful && !skip_recovery)
{
if (!start_empty)
{
if (cur_page_offset)
flush_page();
if (cur_file_fh > (File)-1)
zero_out_cur_file();
close_file();
++cur_file_no;
}
/*
Delete any binlog tablespace files following the last recovered file.
These files could be pre-allocated but never used files, or they could be
files that were written with data that was eventually not recovered due
to --innodb-flush-log-at-trx-commit=0|2.
*/
for (uint64_t i= cur_file_no;
scan_result.found_binlogs >= 1 && i <= scan_result.last_file_no;
++i)
{
binlog_name_make(full_path, i, binlog_dir);
if (my_delete(full_path, MYF(MY_WME)))
sql_print_warning("InnoDB: Could not delete empty file '%s' ("
"error: %d)", full_path, my_errno);
}
}
release();
}
void
binlog_recovery::release() noexcept
{
if (cur_file_fh >= (File)0)
{
my_close(cur_file_fh, MYF(0));
cur_file_fh= (File)-1;
}
ut_free(page_buf);
page_buf= nullptr;
inited= false;
}
bool
binlog_recovery::open_cur_file() noexcept
{
if (cur_file_fh >= (File)0)
my_close(cur_file_fh, MYF(0));
binlog_name_make(full_path, cur_file_no, binlog_dir);
cur_file_fh= my_open(full_path, O_RDWR | O_BINARY, MYF(MY_WME));
if (cur_file_fh < (File)0)
return true;
cur_phys_size= (uint64_t)my_seek(cur_file_fh, 0, MY_SEEK_END, MYF(0));
return false;
}
bool
binlog_recovery::flush_page() noexcept
{
if (cur_file_fh < (File)0 &&
open_cur_file())
return true;
size_t res= my_pwrite(cur_file_fh, page_buf, srv_page_size,
(uint64_t)cur_page_no << srv_page_size_shift,
MYF(MY_WME));
if (res != srv_page_size)
return true;
cur_page_offset= 0;
memset(page_buf, 0, srv_page_size);
return false;
}
void
binlog_recovery::zero_out_cur_file()
{
if (cur_file_fh < (File)0)
return;
/* Recover the original size from the current file. */
size_t read= my_pread(cur_file_fh, page_buf, srv_page_size, 0, MYF(0));
if (read != (size_t)srv_page_size)
{
sql_print_warning("InnoDB: Could not read last binlog file during recovery");
return;
}
binlog_header_data header;
rpl_binlog_state_base dummy_state;
dummy_state.init();
int res= read_gtid_state_from_page(&dummy_state, page_buf, 0, &header);
if (res <= 0)
{
if (res < 0)
sql_print_warning("InnoDB: Could not read last binlog file during recovery");
else
sql_print_warning("InnoDB: Empty binlog file header found during recovery");
ut_ad(0);
return;
}
/* Fill up or truncate the file to its original size. */
if (my_chsize(cur_file_fh, (my_off_t)header.page_count << srv_page_size_shift,
0, MYF(0)))
sql_print_warning("InnoDB: Could not change the size of last binlog file "
"during recovery (error: %d)", my_errno);
for (uint32_t i= cur_page_no + 1; i < header.page_count; ++i)
{
if (my_pread(cur_file_fh, page_buf, srv_page_size,
(my_off_t)i << srv_page_size_shift, MYF(0)) <
(size_t)srv_page_size)
break;
/* Check if page already zeroed out. */
if (page_buf[0] == 0 && !memcmp(page_buf, page_buf+1, srv_page_size - 1))
continue;
memset(page_buf, 0, srv_page_size);
if (my_pwrite(cur_file_fh, page_buf, srv_page_size,
(uint64_t)i << srv_page_size_shift, MYF(MY_WME)) <
(size_t)srv_page_size)
{
sql_print_warning("InnoDB: Error writing to last binlog file during "
"recovery (error code: %d)", my_errno);
break;
}
}
}
bool
binlog_recovery::close_file() noexcept
{
if (cur_file_fh >= (File)0)
{
if (my_sync(cur_file_fh, MYF(MY_WME)))
return true;
my_close(cur_file_fh, (File)0);
cur_file_fh= (File)-1;
cur_phys_size= 0;
}
return false;
}
bool
binlog_recovery::next_file() noexcept
{
if (flush_page())
return true;
if (close_file())
return true;
++cur_file_no;
cur_page_no= 0;
cur_page_offset= 0;
return false;
}
bool
binlog_recovery::next_page() noexcept
{
if (flush_page())
return true;
++cur_page_no;
return false;
}
bool
binlog_recovery::apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
lsn_t start_lsn, lsn_t end_lsn,
const byte *buf, size_t size) noexcept
{
if (UNIV_UNLIKELY(skip_recovery) || start_empty)
return false;
if (skipping_partial_page)
{
if (offset > FIL_PAGE_DATA)
return false;
skipping_partial_page= false;
}
if (start_lsn < start_file_lsn)
{
if (skipping_early_lsn)
return false; /* Skip record for earlier file that's already durable. */
if (!srv_force_recovery)
{
sql_print_error("InnoDB: Unexpected LSN " LSN_PF " during recovery, "
"expected at least " LSN_PF, start_lsn, start_file_lsn);
return true;
}
sql_print_warning("InnoDB: Ignoring unexpected LSN " LSN_PF " during "
"recovery, ", start_lsn);
return false;
}
skipping_early_lsn= false;
/* Test for moving to the next file. */
if (space_id != (cur_file_no & 1))
{
/* Check that we recovered all of this file. */
if ( ( (cur_page_offset > FIL_PAGE_DATA &&
cur_page_offset < srv_page_size - FIL_PAGE_DATA_END) ||
cur_page_no + (cur_page_offset > FIL_PAGE_DATA) <
cur_phys_size >> srv_page_size_shift) &&
!srv_force_recovery)
{
sql_print_error("InnoDB: Missing recovery record at end of file_no="
PRIu64 ", LSN " LSN_PF, cur_file_no, start_lsn);
return true;
}
/* Check that we recover from the start of the next file. */
if ((page_no > 0 || offset > FIL_PAGE_DATA) && !srv_force_recovery)
{
sql_print_error("InnoDB: Missing recovery record at start of file_no="
PRIu64 ", LSN " LSN_PF, cur_file_no+1, start_lsn);
return true;
}
if (next_file())
return true;
}
/* Test for moving to the next page. */
else if (page_no != cur_page_no)
{
if (cur_page_offset < srv_page_size - FIL_PAGE_DATA_END &&
!srv_force_recovery)
{
sql_print_error("InnoDB: Missing recovery record in file_no="
PRIu64 ", page_no=%u, LSN " LSN_PF,
cur_file_no, cur_page_no, start_lsn);
return true;
}
if ((page_no != cur_page_no + 1 || offset > FIL_PAGE_DATA) &&
!srv_force_recovery)
{
sql_print_error("InnoDB: Missing recovery record in file_no="
PRIu64 ", page_no=%u, LSN " LSN_PF,
cur_file_no, cur_page_no + 1, start_lsn);
return true;
}
if (next_page())
return true;
}
/* Test no gaps in offset. */
else if (offset != cur_page_offset &&
offset > FIL_PAGE_DATA &&
!srv_force_recovery)
{
sql_print_error("InnoDB: Missing recovery record in file_no="
PRIu64 ", page_no=%u, LSN " LSN_PF,
cur_file_no, cur_page_no, start_lsn);
return true;
}
if (offset + size >= srv_page_size)
return !srv_force_recovery;
update_page_from_record(offset, buf, size);
return false;
}
void
binlog_recovery::update_page_from_record(uint16_t offset,
const byte *buf, size_t size) noexcept
{
memcpy(page_buf + offset, buf, size);
cur_page_offset= offset + (uint32_t)size;
}
/*
@@ -489,6 +1114,29 @@ start_binlog_prealloc_thread()
}
/*
Write the initial header record to the file and durably sync it to disk in
the binlog tablespace file and in the redo log.
This is to ensure recovery can work correctly. This way, recovery will
always find a non-empty file with an initial lsn to start recovery from.
Except in the case where we crash right here; in this case recovery will
find no binlog files at all and will know to recover to the empty state
with no binlog files present.
*/
static void
binlog_sync_initial()
{
chunk_data_flush dummy_data;
mtr_t mtr;
mtr.start();
fsp_binlog_write_rec(&dummy_data, &mtr, FSP_BINLOG_TYPE_FILLER);
mtr.commit();
log_buffer_flush_to_disk(true);
binlog_page_fifo->flush_up_to(0, 0);
}
/*
Open the InnoDB binlog implementation.
This is called from server binlog layer if the user configured the binlog to
@@ -539,6 +1187,7 @@ innodb_binlog_init(size_t binlog_size, const char *directory)
}
start_binlog_prealloc_thread();
binlog_sync_initial();
return false;
}
@@ -579,6 +1228,42 @@ process_binlog_name(found_binlogs *bls, uint64_t idx, size_t size)
}
/*
Scan the binlog directory for binlog files.
Returns:
1 Success
0 Binlog directory not found
-1 Other error
*/
static int
scan_for_binlogs(const char *binlog_dir, found_binlogs *binlog_files,
bool error_if_missing) noexcept
{
MY_DIR *dir= my_dir(binlog_dir, MYF(MY_WANT_STAT));
if (!dir)
{
if (my_errno != ENOENT || error_if_missing)
sql_print_error("Could not read the binlog directory '%s', error code %d",
binlog_dir, my_errno);
return (my_errno == ENOENT ? 0 : -1);
}
binlog_files->found_binlogs= 0;
size_t num_entries= dir->number_of_files;
fileinfo *entries= dir->dir_entry;
for (size_t i= 0; i < num_entries; ++i) {
const char *name= entries[i].name;
uint64_t idx;
if (!is_binlog_name(name, &idx))
continue;
process_binlog_name(binlog_files, idx, entries[i].mystat->st_size);
}
my_dirend(dir);
return 1; /* Success */
}
static bool
binlog_page_empty(const byte *page)
{
@@ -715,28 +1400,11 @@ innodb_binlog_discover()
uint64_t file_no;
const uint32_t page_size= (uint32_t)srv_page_size;
const uint32_t page_size_shift= (uint32_t)srv_page_size_shift;
MY_DIR *dir= my_dir(innodb_binlog_directory, MYF(MY_WANT_STAT));
if (!dir)
{
if (my_errno == ENOENT)
return 0;
sql_print_error("Could not read the binlog directory '%s', error code %d",
innodb_binlog_directory, my_errno);
return -1;
}
struct found_binlogs UNINIT_VAR(binlog_files);
binlog_files.found_binlogs= 0;
size_t num_entries= dir->number_of_files;
fileinfo *entries= dir->dir_entry;
for (size_t i= 0; i < num_entries; ++i) {
const char *name= entries[i].name;
uint64_t idx;
if (!is_binlog_name(name, &idx))
continue;
process_binlog_name(&binlog_files, idx, entries[i].mystat->st_size);
}
my_dirend(dir);
int res= scan_for_binlogs(innodb_binlog_directory, &binlog_files, false);
if (res <= 0)
return res;
/*
Now, if we found any binlog files, locate the point in one of them where
@@ -752,9 +1420,9 @@ innodb_binlog_discover()
earliest_binlog_file_no= binlog_files.earliest_file_no;
total_binlog_used_size= binlog_files.total_size;
int res= find_pos_in_binlog(binlog_files.last_file_no,
binlog_files.last_size,
page_buf.get(), &page_no, &pos_in_page);
res= find_pos_in_binlog(binlog_files.last_file_no,
binlog_files.last_size,
page_buf.get(), &page_no, &pos_in_page);
if (res < 0) {
file_no= binlog_files.last_file_no;
active_binlog_file_no.store(file_no, std::memory_order_release);
@@ -944,17 +1612,19 @@ innodb_binlog_prealloc_thread()
__attribute__((noinline))
static ssize_t
serialize_gtid_state(rpl_binlog_state_base *state, byte *buf, size_t buf_size,
uint32_t file_size_in_pages, bool is_first_page)
uint32_t file_size_in_pages, uint64_t file_no,
bool is_first_page)
{
unsigned char *p= (unsigned char *)buf;
/*
1 uint64_t for the current LSN at start of binlog file.
1 uint32_t for the file length in pages.
1 uint64_t for the file_no.
1 uint32_t for the file size in pages.
1 uint32_t for the innodb_binlog_state_interval in pages.
1 uint64_t for the number of entries in the state stored.
2 uint32_t + 1 uint64_t for at least one GTID.
*/
ut_ad(buf_size >= 4*COMPR_INT_MAX32 + 2*COMPR_INT_MAX64);
ut_ad(buf_size >= 4*COMPR_INT_MAX32 + 4*COMPR_INT_MAX64);
if (is_first_page) {
/*
In the first page where we put the full state, include the value of the
@@ -962,10 +1632,11 @@ serialize_gtid_state(rpl_binlog_state_base *state, byte *buf, size_t buf_size,
we know how to search them independent of how the setting changes.
We also include the current LSN for recovery purposes; and the file
length, which is also useful if we have to recover the whole file from
the redo log after a crash.
length and file_no, which is also useful if we have to recover the whole
file from the redo log after a crash.
*/
p= compr_int_write(p, log_sys.get_lsn(std::memory_order_acquire));
p= compr_int_write(p, file_no);
p= compr_int_write(p, file_size_in_pages);
/* ToDo: Check that this current_binlog_state_interval is the correct value! */
p= compr_int_write(p, current_binlog_state_interval);
@@ -1005,7 +1676,8 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
block= nullptr;
ssize_t used_bytes= serialize_gtid_state(state, small_buf, sizeof(small_buf),
file_size_in_pages, page_no==0);
file_size_in_pages, file_no,
page_no==0);
if (used_bytes >= 0)
{
buf= small_buf;
@@ -1019,8 +1691,8 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
if (UNIV_UNLIKELY(!alloced_buf))
return true;
buf= alloced_buf;
used_bytes= serialize_gtid_state(state, buf, buf_size,
file_size_in_pages, page_no==0);
used_bytes= serialize_gtid_state(state, buf, buf_size, file_size_in_pages,
file_no, page_no==0);
if (UNIV_UNLIKELY(used_bytes < 0))
{
ut_ad(0 /* Shouldn't happen, as we allocated maximum needed size. */);
@@ -1110,7 +1782,11 @@ read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page,
const byte *p= page + FIL_PAGE_DATA;
byte t= *p;
if (UNIV_UNLIKELY((t & FSP_BINLOG_TYPE_MASK) != FSP_BINLOG_TYPE_GTID_STATE))
{
out_header_data->is_empty= binlog_page_empty(page);
return 0;
}
out_header_data->is_empty= false;
/* ToDo: Handle reading a state that spans multiple pages. For now, we assume the state fits in a single page. */
ut_a(t & FSP_BINLOG_FLAG_LAST);
@@ -1123,15 +1799,21 @@ read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page,
if (page_no == 0)
{
/*
The state in the first page has three extra words: The start LSN of the
file; length of the file in pages; and the offset between differential
binlog states logged regularly in the binlog tablespace.
The state in the first page has four extra words: The start LSN of the
file; the file_no of the file; the file length, in pages; and the offset
between differential binlog states logged regularly in the binlog
tablespace.
*/
if (UNIV_UNLIKELY(p >= p_end))
return -1;
out_header_data->start_lsn= (uint32_t)v_and_p.first;
v_and_p= compr_int_read(p);
p= v_and_p.second;
if (UNIV_UNLIKELY(p >= p_end))
return -1;
out_header_data->file_no= v_and_p.first;
v_and_p= compr_int_read(p);
p= v_and_p.second;
if (UNIV_UNLIKELY(p >= p_end) || UNIV_UNLIKELY(v_and_p.first >= UINT32_MAX))
return -1;
out_header_data->page_count= (uint32_t)v_and_p.first;
@@ -1146,6 +1828,7 @@ read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page,
else
{
out_header_data->start_lsn= 0;
out_header_data->file_no= ~(uint64_t)0;
out_header_data->page_count= 0;
out_header_data->diff_state_interval= 0;
}
@@ -2298,6 +2981,7 @@ innodb_binlog_get_init_state(rpl_binlog_state_base *out_state)
}
bool
innodb_reset_binlogs()
{
@@ -2308,6 +2992,15 @@ innodb_reset_binlogs()
/* Close existing binlog tablespaces and stop the pre-alloc thread. */
innodb_binlog_close(false);
/*
Durably flush the redo log to disk. This is mostly to simplify
conceptually (RESET MASTER is not performance critical). This way, we will
never see a state where recovery stops at an LSN prior to the RESET
MASTER, so we do not have any question around truncating the binlog to a
point before the RESET MASTER.
*/
log_buffer_flush_to_disk(true);
/* Prevent any flushing activity while resetting. */
binlog_page_fifo->lock_wait_for_idle();
binlog_page_fifo->reset();
@@ -2346,6 +3039,7 @@ innodb_reset_binlogs()
innodb_binlog_init_state();
binlog_page_fifo->unlock();
start_binlog_prealloc_thread();
binlog_sync_initial();
return err;
}
@@ -2384,8 +3078,10 @@ innodb_binlog_purge_low(uint64_t limit_file_no,
bool by_name, uint64_t limit_name_file_no,
uint64_t *out_file_no)
{
uint64_t active= active_binlog_file_no.load(std::memory_order_relaxed);
bool need_active_flush= (active <= limit_file_no + 2);
ut_ad(by_date || by_size || by_name);
ut_a(limit_file_no <= active_binlog_file_no.load(std::memory_order_relaxed));
ut_a(limit_file_no <= active);
ut_a(limit_file_no <= first_open_binlog_file_no);
mysql_mutex_assert_owner(&purge_binlog_mutex);
@@ -2431,6 +3127,19 @@ innodb_binlog_purge_low(uint64_t limit_file_no,
}
else
loc_total_size-= stat_buf.st_size;
/*
Make sure that we always leave at least one binlog file durably non-empty,
by fsync()'ing the first page of the active file before deleting file
(active-2). This way, recovery will always have at least one file header
from which to determine the LSN at which to start applying redo records.
*/
if (file_no + 2 >= active && need_active_flush)
{
binlog_page_fifo->flush_up_to(active, 0);
need_active_flush= false;
}
if (my_delete(filename, MYF(0)))
{
if (my_errno == ENOENT)
@@ -2578,3 +3287,25 @@ innodb_binlog_purge(handler_binlog_purge_info *purge_info)
return res;
}
bool
binlog_recover_write_data(bool space_id, uint32_t page_no,
uint16_t offset,
lsn_t start_lsn, lsn_t lsn,
const byte *buf, size_t size) noexcept
{
if (!recover_obj.inited)
return recover_obj.init_recovery(space_id, page_no, offset, start_lsn, lsn,
buf, size);
return recover_obj.apply_redo(space_id, page_no, offset, start_lsn, lsn,
buf, size);
}
void
binlog_recover_end(lsn_t lsn) noexcept
{
if (recover_obj.inited)
recover_obj.end_actions(true);
}

View File

@@ -59,6 +59,21 @@ struct chunk_data_base {
};
/*
Empty chunk data, used to pass a dummy record to fsp_binlog_write_rec()
in fsp_binlog_flush().
*/
struct chunk_data_flush : public chunk_data_base {
~chunk_data_flush() { }
virtual std::pair<uint32_t, bool> copy_data(byte *p, uint32_t max_len) final
{
memset(p, 0xff, max_len);
return {max_len, true};
}
};
/*
Data stored at the start of each binlog file.
(The data is stored in the file as compressed integers; this is just a
@@ -72,10 +87,12 @@ struct binlog_header_data {
*/
lsn_t start_lsn;
/*
The length of this binlog file, in pages. Used during recovery to know
what length to create the binlog file with (in the case where we need to
recover the whole file).
The file_no of the binlog file. This is written into the header to be able
to recover it in the case where no binlog files are present at server
start (could be due to FLUSH BINARY LOGS or RESET MASTER).
*/
uint64_t file_no;
/* The length of this binlog file, in pages. */
uint32_t page_count;
/*
The interval (in pages) at which the (differential) binlog GTID state is
@@ -84,6 +101,8 @@ struct binlog_header_data {
binlog file was created.
*/
uint32_t diff_state_interval;
/* Whether the page was found empty. */
bool is_empty;
};
@@ -104,11 +123,19 @@ extern size_t total_binlog_used_size;
static inline void
binlog_name_make(char name_buf[OS_FILE_MAX_PATH], uint64_t file_no)
binlog_name_make(char name_buf[OS_FILE_MAX_PATH], uint64_t file_no,
const char *binlog_dir)
{
snprintf(name_buf, OS_FILE_MAX_PATH,
"%s/" BINLOG_NAME_BASE "%06" PRIu64 BINLOG_NAME_EXT,
innodb_binlog_directory, file_no);
binlog_dir, file_no);
}
static inline void
binlog_name_make(char name_buf[OS_FILE_MAX_PATH], uint64_t file_no)
{
binlog_name_make(name_buf, file_no, innodb_binlog_directory);
}
@@ -125,7 +152,7 @@ extern void innodb_binlog_close(bool shutdown);
extern bool binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
fsp_binlog_page_entry * &block, uint32_t &page_no,
uint32_t &page_offset, uint64_t file_no,
uint32_t file_size_in_pages);
uint32_t file_size_in_pages);
extern bool innodb_binlog_oob(THD *thd, const unsigned char *data,
size_t data_len, void **engine_data);
extern void innodb_free_oob(THD *thd, void *engine_data);
@@ -140,5 +167,10 @@ extern void innodb_binlog_status(char out_filename[FN_REFLEN],
extern bool innodb_binlog_get_init_state(rpl_binlog_state_base *out_state);
extern bool innodb_reset_binlogs();
extern int innodb_binlog_purge(handler_binlog_purge_info *purge_info);
extern bool binlog_recover_write_data(bool space_id, uint32_t page_no,
uint16_t offset,
lsn_t start_lsn, lsn_t lsn,
const byte *buf, size_t size) noexcept;
extern void binlog_recover_end(lsn_t lsn) noexcept;
#endif /* innodb_binlog_h */

View File

@@ -53,6 +53,7 @@ Created 9/20/1997 Heikki Tuuri
#include "srv0srv.h"
#include "srv0start.h"
#include "fil0pagecompress.h"
#include "innodb_binlog.h"
#include "log.h"
/** The recovery system */
@@ -2379,20 +2380,6 @@ void recv_sys_t::rewind(source &l, source &begin) noexcept
pages_it= pages.end();
}
static void binlog_recover_write_data(bool space_id, uint32_t page_no,
uint16_t offset,
lsn_t start_lsn, lsn_t lsn,
const byte *buf, size_t size) noexcept
{
sql_print_information("ToDo1: binlog_recover_write_data(space_id=%d page_no=%u offset=%u start_lsn=%lu lsn=%lu size=%lu)", (int)space_id, (unsigned)page_no, (unsigned)offset, (ulong)start_lsn, (ulong)lsn, (ulong)size);
for (size_t i= offset; i < offset+size; i+=8)
sql_print_information("ToDo1: 0x%04x %02X %02X %02X %02X %02X %02X %02X %02X", i, buf[i], buf[i+1], buf[i+2], buf[i+3], buf[i+4], buf[i+5], buf[i+6], buf[i+7]);
}
static void binlog_recover_end(lsn_t lsn) noexcept
{
sql_print_information("ToDo1: binlog_recover_end(lsn=%lu)]", (ulong)lsn);
}
/** Parse and register one log_t::FORMAT_10_8 mini-transaction.
@tparam storing whether to store the records
@@ -2548,6 +2535,7 @@ restart:
}
ut_ad(!l.is_eof(rlen));
bool is_binlog= false;
uint32_t idlen;
if ((b & 0x80) && got_page_op)
{
@@ -2597,6 +2585,8 @@ restart:
space_id= mlog_decode_varint(l);
if (UNIV_UNLIKELY(space_id == MLOG_DECODE_ERROR))
goto page_id_corrupted;
static_assert((LOG_BINLOG_ID_0 | 1) == LOG_BINLOG_ID_1, "");
is_binlog= storing == YES && (space_id | 1) == LOG_BINLOG_ID_1;
l+= idlen;
rlen-= idlen;
idlen= mlog_decode_varint_length(*l);
@@ -2632,6 +2622,7 @@ restart:
continue;
}
if (storing == YES && UNIV_LIKELY(space_id != TRX_SYS_SPACE) &&
!is_binlog &&
!srv_is_undo_tablespace(space_id))
{
ut_ad(file_checkpoint != 0);
@@ -2781,28 +2772,34 @@ restart:
ignore the payload and only compute the mini-transaction checksum;
there will be a subsequent call with storing==YES. */
continue;
if (storing == NO)
is_binlog= false;
if (UNIV_UNLIKELY(rlen == 0 || last_offset == 1))
goto record_corrupted;
ut_d(const source payload{l});
cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen);
const uint32_t olen= mlog_decode_varint_length(*cl);
if (UNIV_UNLIKELY(olen >= rlen) || UNIV_UNLIKELY(olen > 3))
goto record_corrupted;
const uint32_t offset= mlog_decode_varint(cl);
ut_ad(offset != MLOG_DECODE_ERROR);
static_assert(FIL_PAGE_OFFSET == 4, "compatibility");
if (UNIV_UNLIKELY(offset >= srv_page_size))
goto record_corrupted;
last_offset+= offset;
if (UNIV_UNLIKELY(last_offset < 8 || last_offset >= srv_page_size))
goto record_corrupted;
cl+= olen;
rlen-= olen;
if (!is_binlog)
{
const uint32_t olen= mlog_decode_varint_length(*cl);
if (UNIV_UNLIKELY(olen >= rlen) || UNIV_UNLIKELY(olen > 3))
goto record_corrupted;
const uint32_t offset= mlog_decode_varint(cl);
ut_ad(offset != MLOG_DECODE_ERROR);
static_assert(FIL_PAGE_OFFSET == 4, "compatibility");
if (UNIV_UNLIKELY(offset >= srv_page_size))
goto record_corrupted;
last_offset+= offset;
if (UNIV_UNLIKELY(last_offset < 8 || last_offset >= srv_page_size))
goto record_corrupted;
cl+= olen;
rlen-= olen;
}
if ((b & 0x70) == WRITE)
{
if (UNIV_UNLIKELY(rlen + last_offset > srv_page_size))
if (is_binlog);
else if (UNIV_UNLIKELY(rlen + last_offset > srv_page_size))
goto record_corrupted;
if (UNIV_UNLIKELY(!page_no) && file_checkpoint)
else if (UNIV_UNLIKELY(!page_no) && file_checkpoint)
{
const bool has_size= last_offset <= FSP_HEADER_OFFSET + FSP_SIZE &&
last_offset + rlen >= FSP_HEADER_OFFSET + FSP_SIZE + 4;
@@ -2822,6 +2819,7 @@ restart:
: file_name_t::initial_flags;
if (it == recv_spaces.end())
ut_ad(storing == NO || space_id == TRX_SYS_SPACE ||
is_binlog ||
srv_is_undo_tablespace(space_id));
else if (!it->second.space)
{
@@ -2883,7 +2881,7 @@ restart:
#endif
if (storing == YES)
{
if (space_id >= LOG_BINLOG_ID_0 && space_id <= LOG_BINLOG_ID_1)
if (is_binlog)
{
if ((b & 0xf0) != WRITE)
goto record_corrupted;
@@ -2894,10 +2892,12 @@ restart:
ut_ad(offset != MLOG_DECODE_ERROR);
if (UNIV_UNLIKELY(offset + rlen - olen >= 65535))
goto record_corrupted;
binlog_recover_write_data(space_id & 1, page_no, uint16_t(offset),
start_lsn, lsn,
l.get_buf(cl, recs, decrypt_buf) + olen,
l - recs + rlen - olen);
const size_t head{l - recs + olen};
if (binlog_recover_write_data(space_id & 1, page_no, uint16_t(offset),
start_lsn, lsn,
l.get_buf(cl, recs, decrypt_buf) + head,
rlen - olen))
goto record_corrupted;
continue;
}
if (if_exists)
@@ -4256,6 +4256,9 @@ static bool recv_scan_log(bool last_phase)
ut_ad(recv_sys.file_checkpoint);
recv_sys.lsn= rewound_lsn;
}
else if (store)
binlog_recover_end(recv_sys.lsn);
func_exit:
ut_d(recv_sys.after_apply= last_phase);
mysql_mutex_unlock(&recv_sys.mutex);