mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-34705: Binlog-in-engine: First working recovery
Still needs more testing. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
@@ -16,25 +16,25 @@ SELECT @@GLOBAL.binlog_checksum;
|
||||
NONE
|
||||
SHOW MASTER STATUS;
|
||||
File Position Binlog_Do_DB Binlog_Ignore_DB
|
||||
binlog-000000.ibb 767
|
||||
SHOW BINLOG EVENTS IN "binlog-000000.ibb";
|
||||
binlog-000000.ibb #
|
||||
include/show_binlog_events.inc
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
binlog-000000.ibb 0 Gtid 1 0 GTID 0-1-1
|
||||
binlog-000000.ibb 0 Query 1 0 use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB
|
||||
binlog-000000.ibb 0 Gtid 1 0 BEGIN GTID 0-1-2
|
||||
binlog-000000.ibb 0 Query 1 0 use `test`; INSERT INTO t1 VALUES (1)
|
||||
binlog-000000.ibb 0 Xid 1 0 COMMIT /* xid=34 */
|
||||
binlog-000000.ibb 0 Gtid 1 0 BEGIN GTID 0-1-3
|
||||
binlog-000000.ibb 0 Query 1 0 use `test`; INSERT INTO t1 VALUES (2)
|
||||
binlog-000000.ibb 0 Query 1 0 use `test`; INSERT INTO t1 VALUES (3)
|
||||
binlog-000000.ibb 0 Xid 1 0 COMMIT /* xid=36 */
|
||||
binlog-000000.ibb 0 Gtid 1 0 GTID 0-1-4
|
||||
binlog-000000.ibb 0 Query 1 0 use `test`; DROP TABLE `t1` /* generated by server */
|
||||
SHOW BINLOG EVENTS LIMIT 2, 3;
|
||||
binlog-000000.ibb # Gtid # # GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB
|
||||
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (1)
|
||||
binlog-000000.ibb # Xid # # COMMIT /* XID */
|
||||
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (2)
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (3)
|
||||
binlog-000000.ibb # Xid # # COMMIT /* XID */
|
||||
binlog-000000.ibb # Gtid # # GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; DROP TABLE `t1` /* generated by server */
|
||||
include/show_binlog_events.inc
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
binlog-000000.ibb 0 Gtid 1 0 BEGIN GTID 0-1-2
|
||||
binlog-000000.ibb 0 Query 1 0 use `test`; INSERT INTO t1 VALUES (1)
|
||||
binlog-000000.ibb 0 Xid 1 0 COMMIT /* xid=34 */
|
||||
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (1)
|
||||
binlog-000000.ibb # Xid # # COMMIT /* XID */
|
||||
CREATE TABLE t2 (a INT PRIMARY KEY, b VARCHAR(2048)) ENGINE=InnoDB;
|
||||
SET SESSION binlog_format= ROW;
|
||||
*** Do 1500 transactions ...
|
||||
|
@@ -27,9 +27,14 @@ SELECT @@GLOBAL.binlog_checksum;
|
||||
# If this gets too annoying to do, we can replace this with something that
|
||||
# checks that the reported file and position is within some reasonable range
|
||||
# of the value left by current code.
|
||||
--replace_column 2 #
|
||||
SHOW MASTER STATUS;
|
||||
SHOW BINLOG EVENTS IN "binlog-000000.ibb";
|
||||
SHOW BINLOG EVENTS LIMIT 2, 3;
|
||||
--let $binlog_file= binlog-000000.ibb
|
||||
--let $binlog_start= 0
|
||||
--source include/show_binlog_events.inc
|
||||
--let $binlog_file=
|
||||
--let $binlog_limit= 2, 3
|
||||
--source include/show_binlog_events.inc
|
||||
|
||||
CREATE TABLE t2 (a INT PRIMARY KEY, b VARCHAR(2048)) ENGINE=InnoDB;
|
||||
|
||||
|
56
mysql-test/suite/binlog_in_engine/recovery.result
Normal file
56
mysql-test/suite/binlog_in_engine/recovery.result
Normal file
@@ -0,0 +1,56 @@
|
||||
RESET MASTER;
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
|
||||
INSERT INTO t1 VALUES (1);
|
||||
|
||||
# Flush all dirty pages from buffer pool
|
||||
SET @no_checkpoint_save_pct= @@GLOBAL.innodb_max_dirty_pages_pct;
|
||||
SET @no_checkpoint_save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct=0.0;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct= @no_checkpoint_save_pct;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct_lwm= @no_checkpoint_save_pct_lwm;
|
||||
|
||||
BEGIN;
|
||||
INSERT INTO t1 VALUES (2);
|
||||
INSERT INTO t1 VALUES (3);
|
||||
COMMIT;
|
||||
INSERT INTO t1 VALUES (4);
|
||||
INSERT INTO t1 VALUES (5);
|
||||
INSERT INTO t1 VALUES (6);
|
||||
INSERT INTO t1 VALUES (7);
|
||||
SELECT * FROM t1 ORDER BY a;
|
||||
a
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
SET SESSION debug_dbug="+d,crash_dispatch_command_before";
|
||||
SELECT 1;
|
||||
Got one of the listed errors
|
||||
include/show_binlog_events.inc
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
binlog-000000.ibb # Gtid # # GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB
|
||||
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (1)
|
||||
binlog-000000.ibb # Xid # # COMMIT /* XID */
|
||||
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (2)
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (3)
|
||||
binlog-000000.ibb # Xid # # COMMIT /* XID */
|
||||
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (4)
|
||||
binlog-000000.ibb # Xid # # COMMIT /* XID */
|
||||
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (5)
|
||||
binlog-000000.ibb # Xid # # COMMIT /* XID */
|
||||
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (6)
|
||||
binlog-000000.ibb # Xid # # COMMIT /* XID */
|
||||
binlog-000000.ibb # Gtid # # BEGIN GTID #-#-#
|
||||
binlog-000000.ibb # Query # # use `test`; INSERT INTO t1 VALUES (7)
|
||||
binlog-000000.ibb # Xid # # COMMIT /* XID */
|
||||
DROP TABLE t1;
|
@@ -14,7 +14,6 @@ INSERT INTO t1 VALUES (1);
|
||||
--let $no_checkpoint_flush= 1
|
||||
--let $no_checkpoint_kill= 1
|
||||
--source ../../suite/innodb/include/no_checkpoint_start.inc
|
||||
SHOW MASTER STATUS;
|
||||
--let $file= query_get_value(SHOW MASTER STATUS, File, 1)
|
||||
--let $pos= query_get_value(SHOW MASTER STATUS, Position, 1)
|
||||
|
||||
@@ -23,8 +22,11 @@ BEGIN;
|
||||
INSERT INTO t1 VALUES (2);
|
||||
INSERT INTO t1 VALUES (3);
|
||||
COMMIT;
|
||||
INSERT INTO t1 VALUES (4);
|
||||
INSERT INTO t1 VALUES (5);
|
||||
INSERT INTO t1 VALUES (6);
|
||||
INSERT INTO t1 VALUES (7);
|
||||
SELECT * FROM t1 ORDER BY a;
|
||||
DROP TABLE t1;
|
||||
|
||||
# Crash the server
|
||||
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
|
||||
@@ -55,5 +57,7 @@ EOF
|
||||
--enable_reconnect
|
||||
--source include/wait_until_connected_again.inc
|
||||
|
||||
SHOW MASTER STATUS;
|
||||
SHOW BINLOG EVENTS;
|
||||
--let $binlog_file=
|
||||
--let $binlog_start= 0
|
||||
--source include/show_binlog_events.inc
|
||||
DROP TABLE t1;
|
||||
|
@@ -4819,11 +4819,11 @@ MYSQL_BIN_LOG::reset_engine_binlogs(THD *thd, rpl_gtid *init_state,
|
||||
mysql_mutex_lock(&LOCK_log);
|
||||
mysql_mutex_lock(&LOCK_index);
|
||||
|
||||
err= (*opt_binlog_engine_hton->reset_binlogs)();
|
||||
if (init_state)
|
||||
rpl_global_gtid_binlog_state.load(init_state, init_state_len);
|
||||
else
|
||||
rpl_global_gtid_binlog_state.reset();
|
||||
err= (*opt_binlog_engine_hton->reset_binlogs)();
|
||||
|
||||
mysql_mutex_unlock(&LOCK_index);
|
||||
mysql_mutex_unlock(&LOCK_log);
|
||||
|
@@ -618,12 +618,8 @@ fsp_log_binlog_write(mtr_t *mtr, fsp_binlog_page_entry *page,
|
||||
page_offset= 0;
|
||||
page->flushed_clean= false;
|
||||
}
|
||||
mtr->write_binlog(LOG_BINLOG_ID_0 + (file_no & 1), page_no,
|
||||
(uint16_t)page_offset, page_offset + &page->page_buf[0],
|
||||
len);
|
||||
sql_print_information("ToDo2: %d, page=%u, off=%u, len=%u)", (int)(file_no & 1), page_no, page_offset, len);
|
||||
for (uint32_t i= page_offset; i < page_offset+len; i+=8)
|
||||
sql_print_information("ToDo2: 0x%04x %02X %02X %02X %02X %02X %02X %02X %02X", i, page->page_buf[i], page->page_buf[i+1], page->page_buf[i+2], page->page_buf[i+3], page->page_buf[i+4], page->page_buf[i+5], page->page_buf[i+6], page->page_buf[i+7]);
|
||||
mtr->write_binlog((file_no & 1), page_no, (uint16_t)page_offset,
|
||||
page_offset + &page->page_buf[0], len);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -809,14 +805,6 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
|
||||
and available; binlog tablespace N is active while (N+1) is being
|
||||
pre-allocated. Only under extreme I/O pressure should be need to
|
||||
stall here.
|
||||
|
||||
ToDo: Handle recovery. Idea: write the current LSN at the start of
|
||||
the binlog tablespace when we create it. At recovery, we should open
|
||||
the (at most) 2 most recent binlog tablespaces. Whenever we have a
|
||||
redo record, skip it if its LSN is smaller than the one stored in the
|
||||
tablespace corresponding to its space_id. This way, it should be safe
|
||||
to re-use tablespace ids between just two, SRV_SPACE_ID_BINLOG0 and
|
||||
SRV_SPACE_ID_BINLOG1.
|
||||
*/
|
||||
ut_ad(!pending_prev_end_offset);
|
||||
pending_prev_end_offset= page_no << page_size_shift;
|
||||
@@ -981,21 +969,6 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Empty chunk data, used to pass a dummy record to fsp_binlog_write_rec()
|
||||
in fsp_binlog_flush().
|
||||
*/
|
||||
struct chunk_data_flush : public chunk_data_base {
|
||||
~chunk_data_flush() { }
|
||||
|
||||
virtual std::pair<uint32_t, bool> copy_data(byte *p, uint32_t max_len) final
|
||||
{
|
||||
memset(p, 0xff, max_len);
|
||||
return {max_len, true};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Implementation of FLUSH BINARY LOGS.
|
||||
Truncate the current binlog tablespace, fill up the last page with dummy data
|
||||
@@ -1080,6 +1053,7 @@ fsp_binlog_flush()
|
||||
mtr.start();
|
||||
fsp_binlog_write_rec(&dummy_data, &mtr, FSP_BINLOG_TYPE_FILLER);
|
||||
mtr.commit();
|
||||
log_buffer_flush_to_disk(srv_flush_log_at_trx_commit & 1);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@@ -408,10 +408,635 @@ struct found_binlogs {
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
This structure holds the state needed during InnoDB recovery for recovering
|
||||
binlog tablespace files.
|
||||
*/
|
||||
class binlog_recovery {
|
||||
public:
|
||||
struct found_binlogs scan_result;
|
||||
byte *page_buf;
|
||||
const char *binlog_dir;
|
||||
/*
|
||||
The current file number being recovered.
|
||||
This starts out as the most recent existing non-empty binlog that has a
|
||||
starting LSN no bigger than the recovery starting LSN. This should always be
|
||||
one of the two most recent binlog files found at startup.
|
||||
*/
|
||||
uint64_t cur_file_no;
|
||||
/* The physical length of cur_file_no file. */
|
||||
uint64_t cur_phys_size;
|
||||
/*
|
||||
The starting LSN (as stored in the header of the binlog tablespace file).
|
||||
No redo prior to this LSN should be applied to this file.
|
||||
*/
|
||||
lsn_t start_file_lsn;
|
||||
/* Open file for cur_file_no, or -1 if not open. */
|
||||
File cur_file_fh;
|
||||
/* The sofar position of redo in cur_file_no (end point of previous redo). */
|
||||
uint32_t cur_page_no;
|
||||
uint32_t cur_page_offset;
|
||||
|
||||
/* The path to cur_file_no. */
|
||||
char full_path[OS_FILE_MAX_PATH];
|
||||
|
||||
bool inited;
|
||||
/*
|
||||
Flag set in case of severe error and --innodb-force_recovery to completely
|
||||
skip any binlog recovery.
|
||||
*/
|
||||
bool skip_recovery;
|
||||
/*
|
||||
Special case, if we start from completely empty (no non-empty binlog files).
|
||||
This should recover into an empty binlog state.
|
||||
*/
|
||||
bool start_empty;
|
||||
/*
|
||||
Special case: The last two files are empty. Then we ignore the last empty
|
||||
file and use the 2 previous files instead. The ignored file is deleted only
|
||||
after successful recovery, to try to avoid destroying data in case of
|
||||
recovery problems.
|
||||
*/
|
||||
bool ignore_last;
|
||||
/*
|
||||
Mark the case where the first binlog tablespace file we need to consider for
|
||||
recovery has file LSN that is later than the first redo record; in this case
|
||||
we need to skip records until the first one that applies to this file.
|
||||
*/
|
||||
bool skipping_early_lsn;
|
||||
/*
|
||||
Skip any initial records until the start of a page. We are guaranteed that
|
||||
any page that needs to be recovered will have recovery data for the whole
|
||||
page, and this way we never need to read-modify-write pages during recovery.
|
||||
*/
|
||||
bool skipping_partial_page;
|
||||
|
||||
bool init_recovery(bool space_id, uint32_t page_no, uint16_t offset,
|
||||
lsn_t start_lsn, lsn_t lsn,
|
||||
const byte *buf, size_t size) noexcept;
|
||||
bool apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
|
||||
lsn_t start_lsn, lsn_t lsn,
|
||||
const byte *buf, size_t size) noexcept;
|
||||
int get_header(uint64_t file_no, lsn_t &out_lsn, bool &out_empty) noexcept;
|
||||
bool init_recovery_from(uint64_t file_no, lsn_t file_lsn, uint32_t page_no,
|
||||
uint16_t offset, lsn_t lsn,
|
||||
const byte *buf, size_t size) noexcept;
|
||||
void init_recovery_empty() noexcept;
|
||||
void init_recovery_skip_all() noexcept;
|
||||
void end_actions(bool recovery_successful) noexcept;
|
||||
void release() noexcept;
|
||||
bool open_cur_file() noexcept;
|
||||
bool flush_page() noexcept;
|
||||
void zero_out_cur_file();
|
||||
bool close_file() noexcept;
|
||||
bool next_file() noexcept;
|
||||
bool next_page() noexcept;
|
||||
void update_page_from_record(uint16_t offset,
|
||||
const byte *buf, size_t size) noexcept;
|
||||
};
|
||||
|
||||
|
||||
static binlog_recovery recover_obj;
|
||||
|
||||
|
||||
static void innodb_binlog_prealloc_thread();
|
||||
static int scan_for_binlogs(const char *binlog_dir, found_binlogs *binlog_files,
|
||||
bool error_if_missing) noexcept;
|
||||
static int innodb_binlog_discover();
|
||||
static bool binlog_state_recover();
|
||||
static void innodb_binlog_autopurge(uint64_t first_open_file_no);
|
||||
static int read_gtid_state_from_page(rpl_binlog_state_base *state,
|
||||
const byte *page, uint32_t page_no,
|
||||
binlog_header_data *out_header_data);
|
||||
|
||||
|
||||
/*
|
||||
Read the header of a binlog tablespace file identified by file_no.
|
||||
Sets the out_empty false if the file is empty or has checksum error (or
|
||||
is missing).
|
||||
Else sets out_empty true and sets out_lsn from the header.
|
||||
|
||||
Returns:
|
||||
-1 error
|
||||
0 File is missing (ENOENT)
|
||||
1 File found (but may be empty according to out_empty).
|
||||
*/
|
||||
int
|
||||
binlog_recovery::get_header(uint64_t file_no, lsn_t &out_lsn, bool &out_empty)
|
||||
noexcept
|
||||
{
|
||||
char full_path[OS_FILE_MAX_PATH];
|
||||
rpl_binlog_state_base dummy_state;
|
||||
binlog_header_data header;
|
||||
|
||||
out_empty= true;
|
||||
out_lsn= 0;
|
||||
|
||||
binlog_name_make(full_path, file_no, binlog_dir);
|
||||
File fh= my_open(full_path, O_RDONLY | O_BINARY, MYF(0));
|
||||
if (fh < (File)0)
|
||||
return (my_errno == ENOENT ? 0 : -1);
|
||||
size_t read= my_pread(fh, page_buf, srv_page_size, 0, MYF(0));
|
||||
my_close(fh, MYF(0));
|
||||
if (UNIV_UNLIKELY(read == (size_t)-1))
|
||||
return -1;
|
||||
if (read == 0)
|
||||
return 0;
|
||||
dummy_state.init();
|
||||
int res= read_gtid_state_from_page(&dummy_state, page_buf, 0, &header);
|
||||
if (res <= 0)
|
||||
return res;
|
||||
if (!header.is_empty)
|
||||
{
|
||||
out_empty= false;
|
||||
out_lsn= header.start_lsn;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
bool binlog_recovery::init_recovery(bool space_id, uint32_t page_no,
|
||||
uint16_t offset,
|
||||
lsn_t start_lsn, lsn_t end_lsn,
|
||||
const byte *buf, size_t size) noexcept
|
||||
{
|
||||
/* Start by initializing resource pointers so we are safe to releaes(). */
|
||||
cur_file_fh= (File)-1;
|
||||
if (!(page_buf= (byte *)ut_malloc(srv_page_size, mem_key_binlog)))
|
||||
{
|
||||
my_error(ER_OUTOFMEMORY, MYF(MY_WME), srv_page_size);
|
||||
return true;
|
||||
}
|
||||
memset(page_buf, 0, srv_page_size);
|
||||
inited= true;
|
||||
/*
|
||||
ToDo: It would be good to find a way to not duplicate this logic for
|
||||
where the binlog tablespace filess are stored with the code in
|
||||
innodb_binlog_init(). But it's a bit awkward, because InnoDB recovery
|
||||
runs during plugin init, so not even available for the server to call
|
||||
into until after recovery is done.
|
||||
*/
|
||||
binlog_dir= opt_binlog_directory;
|
||||
if (!binlog_dir || !binlog_dir[0])
|
||||
binlog_dir= ".";
|
||||
if (scan_for_binlogs(binlog_dir, &scan_result, true) <= 0)
|
||||
return true;
|
||||
|
||||
/*
|
||||
Here we find the two most recent, non-empty binlogs to do recovery on.
|
||||
Before we allocate binlog tablespace file N+2, we flush and fsync file N
|
||||
to disk. This ensures that we only ever need to apply redo records to the
|
||||
two most recent files during recovery.
|
||||
|
||||
A special case however arises if the two most recent binlog files are
|
||||
both completely empty. Then we do not have any LSN to match against to
|
||||
know if a redo record applies to one of these two files, or to an earlier
|
||||
file with same value of bit 0 of the file_no. In this case, we ignore the
|
||||
most recent file (deleting it later after successful recovery), and
|
||||
consider instead the two prior files, the first of which is guaranteed to
|
||||
have durably saved a starting LSN to use.
|
||||
|
||||
Hence the loop, which can only ever have one or two iterations.
|
||||
|
||||
A further special case is if there are fewer than two (or three if last
|
||||
two are empty) files. If there are no files, or only empty files, then the
|
||||
server must have stopped just after RESET MASTER (or just after
|
||||
initializing the binlogs at first startup), and we should just start the
|
||||
binlogs from scratch.
|
||||
*/
|
||||
ignore_last= false;
|
||||
uint64_t file_no2= scan_result.last_file_no;
|
||||
uint64_t file_no1= scan_result.prev_file_no;
|
||||
int num_binlogs= scan_result.found_binlogs;
|
||||
for (;;)
|
||||
{
|
||||
lsn_t lsn1= 0, lsn2= 0;
|
||||
bool is_empty1= true, is_empty2= true;
|
||||
int res2= get_header(file_no2, lsn2, is_empty2);
|
||||
|
||||
if (num_binlogs == 0 ||
|
||||
(num_binlogs == 1 && is_empty2))
|
||||
{
|
||||
init_recovery_empty();
|
||||
return false;
|
||||
}
|
||||
if (num_binlogs == 1)
|
||||
return init_recovery_from(file_no2 + (space_id != (file_no2 & 1)), lsn2,
|
||||
page_no, offset, start_lsn, buf, size);
|
||||
|
||||
int res1= get_header(file_no1, lsn1, is_empty1);
|
||||
|
||||
if (res2 < 0 && !srv_force_recovery)
|
||||
{
|
||||
sql_print_error("InnoDB: I/O error reading binlog file number " PRIu64,
|
||||
file_no2);
|
||||
return true;
|
||||
}
|
||||
if (res1 < 0 && !srv_force_recovery)
|
||||
{
|
||||
sql_print_error("InnoDB: I/O error reading binlog file number " PRIu64,
|
||||
file_no1);
|
||||
return true;
|
||||
}
|
||||
if (is_empty1 && is_empty2)
|
||||
{
|
||||
if (!ignore_last)
|
||||
{
|
||||
ignore_last= true;
|
||||
if (file_no2 > scan_result.earliest_file_no)
|
||||
{
|
||||
--file_no2;
|
||||
if (file_no1 > scan_result.earliest_file_no)
|
||||
--file_no1;
|
||||
else
|
||||
--num_binlogs;
|
||||
}
|
||||
else
|
||||
--num_binlogs;
|
||||
continue;
|
||||
}
|
||||
if (srv_force_recovery)
|
||||
{
|
||||
/*
|
||||
If the last 3 files are empty, we cannot get an LSN to know which
|
||||
records apply to each file. This should not happen unless there is
|
||||
damage to the file system. If force recovery is requested, we must
|
||||
simply do no recovery at all on the binlog files.
|
||||
*/
|
||||
sql_print_warning("InnoDB: Binlog tablespace file recovery is not "
|
||||
"possible. Recovery is skipped due to "
|
||||
"--innodb-force-recovery");
|
||||
init_recovery_skip_all();
|
||||
return false;
|
||||
}
|
||||
sql_print_error("InnoDB: Last 3 binlog tablespace files are all empty. "
|
||||
"Recovery is not possible");
|
||||
return true;
|
||||
}
|
||||
if (is_empty2)
|
||||
lsn2= lsn1;
|
||||
if (space_id == (file_no2 & 1) && start_lsn >= lsn1)
|
||||
{
|
||||
if (start_lsn < lsn2 && !srv_force_recovery)
|
||||
{
|
||||
sql_print_error("InnoDB: inconsistent space_id %d for lsn=%" LSN_PF,
|
||||
(int)space_id, start_lsn);
|
||||
return true;
|
||||
}
|
||||
return init_recovery_from(file_no2, lsn2,
|
||||
page_no, offset, start_lsn, buf, size);
|
||||
}
|
||||
else
|
||||
return init_recovery_from(file_no1, lsn1,
|
||||
page_no, offset, start_lsn, buf, size);
|
||||
/* NotReached. */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
binlog_recovery::init_recovery_from(uint64_t file_no, lsn_t file_lsn,
|
||||
uint32_t page_no, uint16_t offset,
|
||||
lsn_t lsn, const byte *buf, size_t size)
|
||||
noexcept
|
||||
{
|
||||
cur_file_no= file_no;
|
||||
cur_phys_size= 0;
|
||||
start_file_lsn= file_lsn;
|
||||
cur_page_no= page_no;
|
||||
cur_page_offset= 0;
|
||||
skip_recovery= false;
|
||||
start_empty= false;
|
||||
skipping_partial_page= true;
|
||||
if (lsn < start_file_lsn)
|
||||
skipping_early_lsn= true;
|
||||
else
|
||||
{
|
||||
skipping_early_lsn= false;
|
||||
if (offset <= FIL_PAGE_DATA)
|
||||
{
|
||||
update_page_from_record(offset, buf, size);
|
||||
skipping_partial_page= false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Initialize recovery from the state where there are no binlog files, or only
|
||||
completely empty binlog files. In this case we have no file LSN to compare
|
||||
redo records against.
|
||||
|
||||
This can only happen if we crash immediately after RESET MASTER (or fresh
|
||||
server installation) as an initial file header is durably written to disk
|
||||
before binlogging new data. Therefore we should skip _all_ redo records and
|
||||
recover into a completely empty state.
|
||||
*/
|
||||
void
|
||||
binlog_recovery::init_recovery_empty() noexcept
|
||||
{
|
||||
cur_file_no= 0;
|
||||
cur_phys_size= 0;
|
||||
start_file_lsn= (lsn_t)0;
|
||||
cur_page_no= 0;
|
||||
cur_page_offset= 0;
|
||||
skip_recovery= false;
|
||||
start_empty= true;
|
||||
ignore_last= false;
|
||||
skipping_early_lsn= false;
|
||||
skipping_partial_page= true;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
binlog_recovery::init_recovery_skip_all() noexcept
|
||||
{
|
||||
skip_recovery= true;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
binlog_recovery::end_actions(bool recovery_successful) noexcept
|
||||
{
|
||||
char full_path[OS_FILE_MAX_PATH];
|
||||
if (recovery_successful && !skip_recovery)
|
||||
{
|
||||
if (!start_empty)
|
||||
{
|
||||
if (cur_page_offset)
|
||||
flush_page();
|
||||
if (cur_file_fh > (File)-1)
|
||||
zero_out_cur_file();
|
||||
close_file();
|
||||
++cur_file_no;
|
||||
}
|
||||
|
||||
/*
|
||||
Delete any binlog tablespace files following the last recovered file.
|
||||
These files could be pre-allocated but never used files, or they could be
|
||||
files that were written with data that was eventually not recovered due
|
||||
to --innodb-flush-log-at-trx-commit=0|2.
|
||||
*/
|
||||
for (uint64_t i= cur_file_no;
|
||||
scan_result.found_binlogs >= 1 && i <= scan_result.last_file_no;
|
||||
++i)
|
||||
{
|
||||
binlog_name_make(full_path, i, binlog_dir);
|
||||
if (my_delete(full_path, MYF(MY_WME)))
|
||||
sql_print_warning("InnoDB: Could not delete empty file '%s' ("
|
||||
"error: %d)", full_path, my_errno);
|
||||
}
|
||||
}
|
||||
release();
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
binlog_recovery::release() noexcept
|
||||
{
|
||||
if (cur_file_fh >= (File)0)
|
||||
{
|
||||
my_close(cur_file_fh, MYF(0));
|
||||
cur_file_fh= (File)-1;
|
||||
}
|
||||
ut_free(page_buf);
|
||||
page_buf= nullptr;
|
||||
inited= false;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
binlog_recovery::open_cur_file() noexcept
|
||||
{
|
||||
if (cur_file_fh >= (File)0)
|
||||
my_close(cur_file_fh, MYF(0));
|
||||
binlog_name_make(full_path, cur_file_no, binlog_dir);
|
||||
cur_file_fh= my_open(full_path, O_RDWR | O_BINARY, MYF(MY_WME));
|
||||
if (cur_file_fh < (File)0)
|
||||
return true;
|
||||
cur_phys_size= (uint64_t)my_seek(cur_file_fh, 0, MY_SEEK_END, MYF(0));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
binlog_recovery::flush_page() noexcept
|
||||
{
|
||||
if (cur_file_fh < (File)0 &&
|
||||
open_cur_file())
|
||||
return true;
|
||||
size_t res= my_pwrite(cur_file_fh, page_buf, srv_page_size,
|
||||
(uint64_t)cur_page_no << srv_page_size_shift,
|
||||
MYF(MY_WME));
|
||||
if (res != srv_page_size)
|
||||
return true;
|
||||
cur_page_offset= 0;
|
||||
memset(page_buf, 0, srv_page_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
binlog_recovery::zero_out_cur_file()
|
||||
{
|
||||
if (cur_file_fh < (File)0)
|
||||
return;
|
||||
|
||||
/* Recover the original size from the current file. */
|
||||
size_t read= my_pread(cur_file_fh, page_buf, srv_page_size, 0, MYF(0));
|
||||
if (read != (size_t)srv_page_size)
|
||||
{
|
||||
sql_print_warning("InnoDB: Could not read last binlog file during recovery");
|
||||
return;
|
||||
}
|
||||
binlog_header_data header;
|
||||
rpl_binlog_state_base dummy_state;
|
||||
dummy_state.init();
|
||||
int res= read_gtid_state_from_page(&dummy_state, page_buf, 0, &header);
|
||||
if (res <= 0)
|
||||
{
|
||||
if (res < 0)
|
||||
sql_print_warning("InnoDB: Could not read last binlog file during recovery");
|
||||
else
|
||||
sql_print_warning("InnoDB: Empty binlog file header found during recovery");
|
||||
ut_ad(0);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Fill up or truncate the file to its original size. */
|
||||
if (my_chsize(cur_file_fh, (my_off_t)header.page_count << srv_page_size_shift,
|
||||
0, MYF(0)))
|
||||
sql_print_warning("InnoDB: Could not change the size of last binlog file "
|
||||
"during recovery (error: %d)", my_errno);
|
||||
for (uint32_t i= cur_page_no + 1; i < header.page_count; ++i)
|
||||
{
|
||||
if (my_pread(cur_file_fh, page_buf, srv_page_size,
|
||||
(my_off_t)i << srv_page_size_shift, MYF(0)) <
|
||||
(size_t)srv_page_size)
|
||||
break;
|
||||
/* Check if page already zeroed out. */
|
||||
if (page_buf[0] == 0 && !memcmp(page_buf, page_buf+1, srv_page_size - 1))
|
||||
continue;
|
||||
memset(page_buf, 0, srv_page_size);
|
||||
if (my_pwrite(cur_file_fh, page_buf, srv_page_size,
|
||||
(uint64_t)i << srv_page_size_shift, MYF(MY_WME)) <
|
||||
(size_t)srv_page_size)
|
||||
{
|
||||
sql_print_warning("InnoDB: Error writing to last binlog file during "
|
||||
"recovery (error code: %d)", my_errno);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
binlog_recovery::close_file() noexcept
|
||||
{
|
||||
if (cur_file_fh >= (File)0)
|
||||
{
|
||||
if (my_sync(cur_file_fh, MYF(MY_WME)))
|
||||
return true;
|
||||
my_close(cur_file_fh, (File)0);
|
||||
cur_file_fh= (File)-1;
|
||||
cur_phys_size= 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
binlog_recovery::next_file() noexcept
|
||||
{
|
||||
if (flush_page())
|
||||
return true;
|
||||
if (close_file())
|
||||
return true;
|
||||
++cur_file_no;
|
||||
cur_page_no= 0;
|
||||
cur_page_offset= 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
binlog_recovery::next_page() noexcept
|
||||
{
|
||||
if (flush_page())
|
||||
return true;
|
||||
++cur_page_no;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
binlog_recovery::apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
|
||||
lsn_t start_lsn, lsn_t end_lsn,
|
||||
const byte *buf, size_t size) noexcept
|
||||
{
|
||||
if (UNIV_UNLIKELY(skip_recovery) || start_empty)
|
||||
return false;
|
||||
|
||||
if (skipping_partial_page)
|
||||
{
|
||||
if (offset > FIL_PAGE_DATA)
|
||||
return false;
|
||||
skipping_partial_page= false;
|
||||
}
|
||||
|
||||
if (start_lsn < start_file_lsn)
|
||||
{
|
||||
if (skipping_early_lsn)
|
||||
return false; /* Skip record for earlier file that's already durable. */
|
||||
if (!srv_force_recovery)
|
||||
{
|
||||
sql_print_error("InnoDB: Unexpected LSN " LSN_PF " during recovery, "
|
||||
"expected at least " LSN_PF, start_lsn, start_file_lsn);
|
||||
return true;
|
||||
}
|
||||
sql_print_warning("InnoDB: Ignoring unexpected LSN " LSN_PF " during "
|
||||
"recovery, ", start_lsn);
|
||||
return false;
|
||||
}
|
||||
skipping_early_lsn= false;
|
||||
|
||||
/* Test for moving to the next file. */
|
||||
if (space_id != (cur_file_no & 1))
|
||||
{
|
||||
/* Check that we recovered all of this file. */
|
||||
if ( ( (cur_page_offset > FIL_PAGE_DATA &&
|
||||
cur_page_offset < srv_page_size - FIL_PAGE_DATA_END) ||
|
||||
cur_page_no + (cur_page_offset > FIL_PAGE_DATA) <
|
||||
cur_phys_size >> srv_page_size_shift) &&
|
||||
!srv_force_recovery)
|
||||
{
|
||||
sql_print_error("InnoDB: Missing recovery record at end of file_no="
|
||||
PRIu64 ", LSN " LSN_PF, cur_file_no, start_lsn);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check that we recover from the start of the next file. */
|
||||
if ((page_no > 0 || offset > FIL_PAGE_DATA) && !srv_force_recovery)
|
||||
{
|
||||
sql_print_error("InnoDB: Missing recovery record at start of file_no="
|
||||
PRIu64 ", LSN " LSN_PF, cur_file_no+1, start_lsn);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (next_file())
|
||||
return true;
|
||||
}
|
||||
/* Test for moving to the next page. */
|
||||
else if (page_no != cur_page_no)
|
||||
{
|
||||
if (cur_page_offset < srv_page_size - FIL_PAGE_DATA_END &&
|
||||
!srv_force_recovery)
|
||||
{
|
||||
sql_print_error("InnoDB: Missing recovery record in file_no="
|
||||
PRIu64 ", page_no=%u, LSN " LSN_PF,
|
||||
cur_file_no, cur_page_no, start_lsn);
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((page_no != cur_page_no + 1 || offset > FIL_PAGE_DATA) &&
|
||||
!srv_force_recovery)
|
||||
{
|
||||
sql_print_error("InnoDB: Missing recovery record in file_no="
|
||||
PRIu64 ", page_no=%u, LSN " LSN_PF,
|
||||
cur_file_no, cur_page_no + 1, start_lsn);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (next_page())
|
||||
return true;
|
||||
}
|
||||
/* Test no gaps in offset. */
|
||||
else if (offset != cur_page_offset &&
|
||||
offset > FIL_PAGE_DATA &&
|
||||
!srv_force_recovery)
|
||||
{
|
||||
sql_print_error("InnoDB: Missing recovery record in file_no="
|
||||
PRIu64 ", page_no=%u, LSN " LSN_PF,
|
||||
cur_file_no, cur_page_no, start_lsn);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (offset + size >= srv_page_size)
|
||||
return !srv_force_recovery;
|
||||
|
||||
update_page_from_record(offset, buf, size);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
binlog_recovery::update_page_from_record(uint16_t offset,
|
||||
const byte *buf, size_t size) noexcept
|
||||
{
|
||||
memcpy(page_buf + offset, buf, size);
|
||||
cur_page_offset= offset + (uint32_t)size;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
@@ -489,6 +1114,29 @@ start_binlog_prealloc_thread()
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Write the initial header record to the file and durably sync it to disk in
|
||||
the binlog tablespace file and in the redo log.
|
||||
|
||||
This is to ensure recovery can work correctly. This way, recovery will
|
||||
always find a non-empty file with an initial lsn to start recovery from.
|
||||
Except in the case where we crash right here; in this case recovery will
|
||||
find no binlog files at all and will know to recover to the empty state
|
||||
with no binlog files present.
|
||||
*/
|
||||
static void
|
||||
binlog_sync_initial()
|
||||
{
|
||||
chunk_data_flush dummy_data;
|
||||
mtr_t mtr;
|
||||
mtr.start();
|
||||
fsp_binlog_write_rec(&dummy_data, &mtr, FSP_BINLOG_TYPE_FILLER);
|
||||
mtr.commit();
|
||||
log_buffer_flush_to_disk(true);
|
||||
binlog_page_fifo->flush_up_to(0, 0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Open the InnoDB binlog implementation.
|
||||
This is called from server binlog layer if the user configured the binlog to
|
||||
@@ -539,6 +1187,7 @@ innodb_binlog_init(size_t binlog_size, const char *directory)
|
||||
}
|
||||
|
||||
start_binlog_prealloc_thread();
|
||||
binlog_sync_initial();
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -579,6 +1228,42 @@ process_binlog_name(found_binlogs *bls, uint64_t idx, size_t size)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Scan the binlog directory for binlog files.
|
||||
Returns:
|
||||
1 Success
|
||||
0 Binlog directory not found
|
||||
-1 Other error
|
||||
*/
|
||||
static int
|
||||
scan_for_binlogs(const char *binlog_dir, found_binlogs *binlog_files,
|
||||
bool error_if_missing) noexcept
|
||||
{
|
||||
MY_DIR *dir= my_dir(binlog_dir, MYF(MY_WANT_STAT));
|
||||
if (!dir)
|
||||
{
|
||||
if (my_errno != ENOENT || error_if_missing)
|
||||
sql_print_error("Could not read the binlog directory '%s', error code %d",
|
||||
binlog_dir, my_errno);
|
||||
return (my_errno == ENOENT ? 0 : -1);
|
||||
}
|
||||
|
||||
binlog_files->found_binlogs= 0;
|
||||
size_t num_entries= dir->number_of_files;
|
||||
fileinfo *entries= dir->dir_entry;
|
||||
for (size_t i= 0; i < num_entries; ++i) {
|
||||
const char *name= entries[i].name;
|
||||
uint64_t idx;
|
||||
if (!is_binlog_name(name, &idx))
|
||||
continue;
|
||||
process_binlog_name(binlog_files, idx, entries[i].mystat->st_size);
|
||||
}
|
||||
my_dirend(dir);
|
||||
|
||||
return 1; /* Success */
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
binlog_page_empty(const byte *page)
|
||||
{
|
||||
@@ -715,28 +1400,11 @@ innodb_binlog_discover()
|
||||
uint64_t file_no;
|
||||
const uint32_t page_size= (uint32_t)srv_page_size;
|
||||
const uint32_t page_size_shift= (uint32_t)srv_page_size_shift;
|
||||
MY_DIR *dir= my_dir(innodb_binlog_directory, MYF(MY_WANT_STAT));
|
||||
if (!dir)
|
||||
{
|
||||
if (my_errno == ENOENT)
|
||||
return 0;
|
||||
sql_print_error("Could not read the binlog directory '%s', error code %d",
|
||||
innodb_binlog_directory, my_errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct found_binlogs UNINIT_VAR(binlog_files);
|
||||
binlog_files.found_binlogs= 0;
|
||||
size_t num_entries= dir->number_of_files;
|
||||
fileinfo *entries= dir->dir_entry;
|
||||
for (size_t i= 0; i < num_entries; ++i) {
|
||||
const char *name= entries[i].name;
|
||||
uint64_t idx;
|
||||
if (!is_binlog_name(name, &idx))
|
||||
continue;
|
||||
process_binlog_name(&binlog_files, idx, entries[i].mystat->st_size);
|
||||
}
|
||||
my_dirend(dir);
|
||||
|
||||
int res= scan_for_binlogs(innodb_binlog_directory, &binlog_files, false);
|
||||
if (res <= 0)
|
||||
return res;
|
||||
|
||||
/*
|
||||
Now, if we found any binlog files, locate the point in one of them where
|
||||
@@ -752,7 +1420,7 @@ innodb_binlog_discover()
|
||||
earliest_binlog_file_no= binlog_files.earliest_file_no;
|
||||
total_binlog_used_size= binlog_files.total_size;
|
||||
|
||||
int res= find_pos_in_binlog(binlog_files.last_file_no,
|
||||
res= find_pos_in_binlog(binlog_files.last_file_no,
|
||||
binlog_files.last_size,
|
||||
page_buf.get(), &page_no, &pos_in_page);
|
||||
if (res < 0) {
|
||||
@@ -944,17 +1612,19 @@ innodb_binlog_prealloc_thread()
|
||||
__attribute__((noinline))
|
||||
static ssize_t
|
||||
serialize_gtid_state(rpl_binlog_state_base *state, byte *buf, size_t buf_size,
|
||||
uint32_t file_size_in_pages, bool is_first_page)
|
||||
uint32_t file_size_in_pages, uint64_t file_no,
|
||||
bool is_first_page)
|
||||
{
|
||||
unsigned char *p= (unsigned char *)buf;
|
||||
/*
|
||||
1 uint64_t for the current LSN at start of binlog file.
|
||||
1 uint32_t for the file length in pages.
|
||||
1 uint64_t for the file_no.
|
||||
1 uint32_t for the file size in pages.
|
||||
1 uint32_t for the innodb_binlog_state_interval in pages.
|
||||
1 uint64_t for the number of entries in the state stored.
|
||||
2 uint32_t + 1 uint64_t for at least one GTID.
|
||||
*/
|
||||
ut_ad(buf_size >= 4*COMPR_INT_MAX32 + 2*COMPR_INT_MAX64);
|
||||
ut_ad(buf_size >= 4*COMPR_INT_MAX32 + 4*COMPR_INT_MAX64);
|
||||
if (is_first_page) {
|
||||
/*
|
||||
In the first page where we put the full state, include the value of the
|
||||
@@ -962,10 +1632,11 @@ serialize_gtid_state(rpl_binlog_state_base *state, byte *buf, size_t buf_size,
|
||||
we know how to search them independent of how the setting changes.
|
||||
|
||||
We also include the current LSN for recovery purposes; and the file
|
||||
length, which is also useful if we have to recover the whole file from
|
||||
the redo log after a crash.
|
||||
length and file_no, which is also useful if we have to recover the whole
|
||||
file from the redo log after a crash.
|
||||
*/
|
||||
p= compr_int_write(p, log_sys.get_lsn(std::memory_order_acquire));
|
||||
p= compr_int_write(p, file_no);
|
||||
p= compr_int_write(p, file_size_in_pages);
|
||||
/* ToDo: Check that this current_binlog_state_interval is the correct value! */
|
||||
p= compr_int_write(p, current_binlog_state_interval);
|
||||
@@ -1005,7 +1676,8 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
|
||||
block= nullptr;
|
||||
|
||||
ssize_t used_bytes= serialize_gtid_state(state, small_buf, sizeof(small_buf),
|
||||
file_size_in_pages, page_no==0);
|
||||
file_size_in_pages, file_no,
|
||||
page_no==0);
|
||||
if (used_bytes >= 0)
|
||||
{
|
||||
buf= small_buf;
|
||||
@@ -1019,8 +1691,8 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
|
||||
if (UNIV_UNLIKELY(!alloced_buf))
|
||||
return true;
|
||||
buf= alloced_buf;
|
||||
used_bytes= serialize_gtid_state(state, buf, buf_size,
|
||||
file_size_in_pages, page_no==0);
|
||||
used_bytes= serialize_gtid_state(state, buf, buf_size, file_size_in_pages,
|
||||
file_no, page_no==0);
|
||||
if (UNIV_UNLIKELY(used_bytes < 0))
|
||||
{
|
||||
ut_ad(0 /* Shouldn't happen, as we allocated maximum needed size. */);
|
||||
@@ -1110,7 +1782,11 @@ read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page,
|
||||
const byte *p= page + FIL_PAGE_DATA;
|
||||
byte t= *p;
|
||||
if (UNIV_UNLIKELY((t & FSP_BINLOG_TYPE_MASK) != FSP_BINLOG_TYPE_GTID_STATE))
|
||||
{
|
||||
out_header_data->is_empty= binlog_page_empty(page);
|
||||
return 0;
|
||||
}
|
||||
out_header_data->is_empty= false;
|
||||
/* ToDo: Handle reading a state that spans multiple pages. For now, we assume the state fits in a single page. */
|
||||
ut_a(t & FSP_BINLOG_FLAG_LAST);
|
||||
|
||||
@@ -1123,15 +1799,21 @@ read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page,
|
||||
if (page_no == 0)
|
||||
{
|
||||
/*
|
||||
The state in the first page has three extra words: The start LSN of the
|
||||
file; length of the file in pages; and the offset between differential
|
||||
binlog states logged regularly in the binlog tablespace.
|
||||
The state in the first page has four extra words: The start LSN of the
|
||||
file; the file_no of the file; the file length, in pages; and the offset
|
||||
between differential binlog states logged regularly in the binlog
|
||||
tablespace.
|
||||
*/
|
||||
if (UNIV_UNLIKELY(p >= p_end))
|
||||
return -1;
|
||||
out_header_data->start_lsn= (uint32_t)v_and_p.first;
|
||||
v_and_p= compr_int_read(p);
|
||||
p= v_and_p.second;
|
||||
if (UNIV_UNLIKELY(p >= p_end))
|
||||
return -1;
|
||||
out_header_data->file_no= v_and_p.first;
|
||||
v_and_p= compr_int_read(p);
|
||||
p= v_and_p.second;
|
||||
if (UNIV_UNLIKELY(p >= p_end) || UNIV_UNLIKELY(v_and_p.first >= UINT32_MAX))
|
||||
return -1;
|
||||
out_header_data->page_count= (uint32_t)v_and_p.first;
|
||||
@@ -1146,6 +1828,7 @@ read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page,
|
||||
else
|
||||
{
|
||||
out_header_data->start_lsn= 0;
|
||||
out_header_data->file_no= ~(uint64_t)0;
|
||||
out_header_data->page_count= 0;
|
||||
out_header_data->diff_state_interval= 0;
|
||||
}
|
||||
@@ -2298,6 +2981,7 @@ innodb_binlog_get_init_state(rpl_binlog_state_base *out_state)
|
||||
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
innodb_reset_binlogs()
|
||||
{
|
||||
@@ -2308,6 +2992,15 @@ innodb_reset_binlogs()
|
||||
/* Close existing binlog tablespaces and stop the pre-alloc thread. */
|
||||
innodb_binlog_close(false);
|
||||
|
||||
/*
|
||||
Durably flush the redo log to disk. This is mostly to simplify
|
||||
conceptually (RESET MASTER is not performance critical). This way, we will
|
||||
never see a state where recovery stops at an LSN prior to the RESET
|
||||
MASTER, so we do not have any question around truncating the binlog to a
|
||||
point before the RESET MASTER.
|
||||
*/
|
||||
log_buffer_flush_to_disk(true);
|
||||
|
||||
/* Prevent any flushing activity while resetting. */
|
||||
binlog_page_fifo->lock_wait_for_idle();
|
||||
binlog_page_fifo->reset();
|
||||
@@ -2346,6 +3039,7 @@ innodb_reset_binlogs()
|
||||
innodb_binlog_init_state();
|
||||
binlog_page_fifo->unlock();
|
||||
start_binlog_prealloc_thread();
|
||||
binlog_sync_initial();
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -2384,8 +3078,10 @@ innodb_binlog_purge_low(uint64_t limit_file_no,
|
||||
bool by_name, uint64_t limit_name_file_no,
|
||||
uint64_t *out_file_no)
|
||||
{
|
||||
uint64_t active= active_binlog_file_no.load(std::memory_order_relaxed);
|
||||
bool need_active_flush= (active <= limit_file_no + 2);
|
||||
ut_ad(by_date || by_size || by_name);
|
||||
ut_a(limit_file_no <= active_binlog_file_no.load(std::memory_order_relaxed));
|
||||
ut_a(limit_file_no <= active);
|
||||
ut_a(limit_file_no <= first_open_binlog_file_no);
|
||||
|
||||
mysql_mutex_assert_owner(&purge_binlog_mutex);
|
||||
@@ -2431,6 +3127,19 @@ innodb_binlog_purge_low(uint64_t limit_file_no,
|
||||
}
|
||||
else
|
||||
loc_total_size-= stat_buf.st_size;
|
||||
|
||||
/*
|
||||
Make sure that we always leave at least one binlog file durably non-empty,
|
||||
by fsync()'ing the first page of the active file before deleting file
|
||||
(active-2). This way, recovery will always have at least one file header
|
||||
from which to determine the LSN at which to start applying redo records.
|
||||
*/
|
||||
if (file_no + 2 >= active && need_active_flush)
|
||||
{
|
||||
binlog_page_fifo->flush_up_to(active, 0);
|
||||
need_active_flush= false;
|
||||
}
|
||||
|
||||
if (my_delete(filename, MYF(0)))
|
||||
{
|
||||
if (my_errno == ENOENT)
|
||||
@@ -2578,3 +3287,25 @@ innodb_binlog_purge(handler_binlog_purge_info *purge_info)
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
binlog_recover_write_data(bool space_id, uint32_t page_no,
|
||||
uint16_t offset,
|
||||
lsn_t start_lsn, lsn_t lsn,
|
||||
const byte *buf, size_t size) noexcept
|
||||
{
|
||||
if (!recover_obj.inited)
|
||||
return recover_obj.init_recovery(space_id, page_no, offset, start_lsn, lsn,
|
||||
buf, size);
|
||||
return recover_obj.apply_redo(space_id, page_no, offset, start_lsn, lsn,
|
||||
buf, size);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
binlog_recover_end(lsn_t lsn) noexcept
|
||||
{
|
||||
if (recover_obj.inited)
|
||||
recover_obj.end_actions(true);
|
||||
}
|
||||
|
@@ -59,6 +59,21 @@ struct chunk_data_base {
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Empty chunk data, used to pass a dummy record to fsp_binlog_write_rec()
|
||||
in fsp_binlog_flush().
|
||||
*/
|
||||
struct chunk_data_flush : public chunk_data_base {
|
||||
~chunk_data_flush() { }
|
||||
|
||||
virtual std::pair<uint32_t, bool> copy_data(byte *p, uint32_t max_len) final
|
||||
{
|
||||
memset(p, 0xff, max_len);
|
||||
return {max_len, true};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Data stored at the start of each binlog file.
|
||||
(The data is stored in the file as compressed integers; this is just a
|
||||
@@ -72,10 +87,12 @@ struct binlog_header_data {
|
||||
*/
|
||||
lsn_t start_lsn;
|
||||
/*
|
||||
The length of this binlog file, in pages. Used during recovery to know
|
||||
what length to create the binlog file with (in the case where we need to
|
||||
recover the whole file).
|
||||
The file_no of the binlog file. This is written into the header to be able
|
||||
to recover it in the case where no binlog files are present at server
|
||||
start (could be due to FLUSH BINARY LOGS or RESET MASTER).
|
||||
*/
|
||||
uint64_t file_no;
|
||||
/* The length of this binlog file, in pages. */
|
||||
uint32_t page_count;
|
||||
/*
|
||||
The interval (in pages) at which the (differential) binlog GTID state is
|
||||
@@ -84,6 +101,8 @@ struct binlog_header_data {
|
||||
binlog file was created.
|
||||
*/
|
||||
uint32_t diff_state_interval;
|
||||
/* Whether the page was found empty. */
|
||||
bool is_empty;
|
||||
};
|
||||
|
||||
|
||||
@@ -104,11 +123,19 @@ extern size_t total_binlog_used_size;
|
||||
|
||||
|
||||
static inline void
|
||||
binlog_name_make(char name_buf[OS_FILE_MAX_PATH], uint64_t file_no)
|
||||
binlog_name_make(char name_buf[OS_FILE_MAX_PATH], uint64_t file_no,
|
||||
const char *binlog_dir)
|
||||
{
|
||||
snprintf(name_buf, OS_FILE_MAX_PATH,
|
||||
"%s/" BINLOG_NAME_BASE "%06" PRIu64 BINLOG_NAME_EXT,
|
||||
innodb_binlog_directory, file_no);
|
||||
binlog_dir, file_no);
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
binlog_name_make(char name_buf[OS_FILE_MAX_PATH], uint64_t file_no)
|
||||
{
|
||||
binlog_name_make(name_buf, file_no, innodb_binlog_directory);
|
||||
}
|
||||
|
||||
|
||||
@@ -140,5 +167,10 @@ extern void innodb_binlog_status(char out_filename[FN_REFLEN],
|
||||
extern bool innodb_binlog_get_init_state(rpl_binlog_state_base *out_state);
|
||||
extern bool innodb_reset_binlogs();
|
||||
extern int innodb_binlog_purge(handler_binlog_purge_info *purge_info);
|
||||
extern bool binlog_recover_write_data(bool space_id, uint32_t page_no,
|
||||
uint16_t offset,
|
||||
lsn_t start_lsn, lsn_t lsn,
|
||||
const byte *buf, size_t size) noexcept;
|
||||
extern void binlog_recover_end(lsn_t lsn) noexcept;
|
||||
|
||||
#endif /* innodb_binlog_h */
|
||||
|
@@ -53,6 +53,7 @@ Created 9/20/1997 Heikki Tuuri
|
||||
#include "srv0srv.h"
|
||||
#include "srv0start.h"
|
||||
#include "fil0pagecompress.h"
|
||||
#include "innodb_binlog.h"
|
||||
#include "log.h"
|
||||
|
||||
/** The recovery system */
|
||||
@@ -2379,20 +2380,6 @@ void recv_sys_t::rewind(source &l, source &begin) noexcept
|
||||
pages_it= pages.end();
|
||||
}
|
||||
|
||||
static void binlog_recover_write_data(bool space_id, uint32_t page_no,
|
||||
uint16_t offset,
|
||||
lsn_t start_lsn, lsn_t lsn,
|
||||
const byte *buf, size_t size) noexcept
|
||||
{
|
||||
sql_print_information("ToDo1: binlog_recover_write_data(space_id=%d page_no=%u offset=%u start_lsn=%lu lsn=%lu size=%lu)", (int)space_id, (unsigned)page_no, (unsigned)offset, (ulong)start_lsn, (ulong)lsn, (ulong)size);
|
||||
for (size_t i= offset; i < offset+size; i+=8)
|
||||
sql_print_information("ToDo1: 0x%04x %02X %02X %02X %02X %02X %02X %02X %02X", i, buf[i], buf[i+1], buf[i+2], buf[i+3], buf[i+4], buf[i+5], buf[i+6], buf[i+7]);
|
||||
}
|
||||
static void binlog_recover_end(lsn_t lsn) noexcept
|
||||
{
|
||||
sql_print_information("ToDo1: binlog_recover_end(lsn=%lu)]", (ulong)lsn);
|
||||
}
|
||||
|
||||
|
||||
/** Parse and register one log_t::FORMAT_10_8 mini-transaction.
|
||||
@tparam storing whether to store the records
|
||||
@@ -2548,6 +2535,7 @@ restart:
|
||||
}
|
||||
ut_ad(!l.is_eof(rlen));
|
||||
|
||||
bool is_binlog= false;
|
||||
uint32_t idlen;
|
||||
if ((b & 0x80) && got_page_op)
|
||||
{
|
||||
@@ -2597,6 +2585,8 @@ restart:
|
||||
space_id= mlog_decode_varint(l);
|
||||
if (UNIV_UNLIKELY(space_id == MLOG_DECODE_ERROR))
|
||||
goto page_id_corrupted;
|
||||
static_assert((LOG_BINLOG_ID_0 | 1) == LOG_BINLOG_ID_1, "");
|
||||
is_binlog= storing == YES && (space_id | 1) == LOG_BINLOG_ID_1;
|
||||
l+= idlen;
|
||||
rlen-= idlen;
|
||||
idlen= mlog_decode_varint_length(*l);
|
||||
@@ -2632,6 +2622,7 @@ restart:
|
||||
continue;
|
||||
}
|
||||
if (storing == YES && UNIV_LIKELY(space_id != TRX_SYS_SPACE) &&
|
||||
!is_binlog &&
|
||||
!srv_is_undo_tablespace(space_id))
|
||||
{
|
||||
ut_ad(file_checkpoint != 0);
|
||||
@@ -2781,10 +2772,14 @@ restart:
|
||||
ignore the payload and only compute the mini-transaction checksum;
|
||||
there will be a subsequent call with storing==YES. */
|
||||
continue;
|
||||
if (storing == NO)
|
||||
is_binlog= false;
|
||||
if (UNIV_UNLIKELY(rlen == 0 || last_offset == 1))
|
||||
goto record_corrupted;
|
||||
ut_d(const source payload{l});
|
||||
cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen);
|
||||
if (!is_binlog)
|
||||
{
|
||||
const uint32_t olen= mlog_decode_varint_length(*cl);
|
||||
if (UNIV_UNLIKELY(olen >= rlen) || UNIV_UNLIKELY(olen > 3))
|
||||
goto record_corrupted;
|
||||
@@ -2798,11 +2793,13 @@ restart:
|
||||
goto record_corrupted;
|
||||
cl+= olen;
|
||||
rlen-= olen;
|
||||
}
|
||||
if ((b & 0x70) == WRITE)
|
||||
{
|
||||
if (UNIV_UNLIKELY(rlen + last_offset > srv_page_size))
|
||||
if (is_binlog);
|
||||
else if (UNIV_UNLIKELY(rlen + last_offset > srv_page_size))
|
||||
goto record_corrupted;
|
||||
if (UNIV_UNLIKELY(!page_no) && file_checkpoint)
|
||||
else if (UNIV_UNLIKELY(!page_no) && file_checkpoint)
|
||||
{
|
||||
const bool has_size= last_offset <= FSP_HEADER_OFFSET + FSP_SIZE &&
|
||||
last_offset + rlen >= FSP_HEADER_OFFSET + FSP_SIZE + 4;
|
||||
@@ -2822,6 +2819,7 @@ restart:
|
||||
: file_name_t::initial_flags;
|
||||
if (it == recv_spaces.end())
|
||||
ut_ad(storing == NO || space_id == TRX_SYS_SPACE ||
|
||||
is_binlog ||
|
||||
srv_is_undo_tablespace(space_id));
|
||||
else if (!it->second.space)
|
||||
{
|
||||
@@ -2883,7 +2881,7 @@ restart:
|
||||
#endif
|
||||
if (storing == YES)
|
||||
{
|
||||
if (space_id >= LOG_BINLOG_ID_0 && space_id <= LOG_BINLOG_ID_1)
|
||||
if (is_binlog)
|
||||
{
|
||||
if ((b & 0xf0) != WRITE)
|
||||
goto record_corrupted;
|
||||
@@ -2894,10 +2892,12 @@ restart:
|
||||
ut_ad(offset != MLOG_DECODE_ERROR);
|
||||
if (UNIV_UNLIKELY(offset + rlen - olen >= 65535))
|
||||
goto record_corrupted;
|
||||
binlog_recover_write_data(space_id & 1, page_no, uint16_t(offset),
|
||||
const size_t head{l - recs + olen};
|
||||
if (binlog_recover_write_data(space_id & 1, page_no, uint16_t(offset),
|
||||
start_lsn, lsn,
|
||||
l.get_buf(cl, recs, decrypt_buf) + olen,
|
||||
l - recs + rlen - olen);
|
||||
l.get_buf(cl, recs, decrypt_buf) + head,
|
||||
rlen - olen))
|
||||
goto record_corrupted;
|
||||
continue;
|
||||
}
|
||||
if (if_exists)
|
||||
@@ -4256,6 +4256,9 @@ static bool recv_scan_log(bool last_phase)
|
||||
ut_ad(recv_sys.file_checkpoint);
|
||||
recv_sys.lsn= rewound_lsn;
|
||||
}
|
||||
else if (store)
|
||||
binlog_recover_end(recv_sys.lsn);
|
||||
|
||||
func_exit:
|
||||
ut_d(recv_sys.after_apply= last_phase);
|
||||
mysql_mutex_unlock(&recv_sys.mutex);
|
||||
|
Reference in New Issue
Block a user