mirror of
https://github.com/MariaDB/server.git
synced 2025-08-07 00:04:31 +03:00
MDEV-36180 Doublewrite recovery of innodb_checksum_algorithm=full_crc32 page_compressed pages does not work
- InnoDB fails to recover the full crc32 page_compressed page from doublewrite buffer. The reason is that buf_dblwr_t::recover() fails to identify the space id from the page because the page has compressed from FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION bytes. Fix: === recv_dblwr_t::find_deferred_page(): Find the page which has the same page number and try to decompress/decrypt the page based on the tablespace metadata. After the decompression/decryption, compare the space id and write the recovered page back to the file. buf_page_t::read_complete(): Page read from disk is corrupted then try to read the page from deferred pages in doublewrite buffer.
This commit is contained in:
committed by
Sergei Golubchik
parent
19c4e1abe4
commit
a390aaaf23
@@ -3,8 +3,9 @@ call mtr.add_suppression("InnoDB: Unable to apply log to corrupted page ");
|
|||||||
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
|
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
|
||||||
call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
|
call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
|
||||||
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
|
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
|
||||||
create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0;
|
create table t1 (f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=yes stats_persistent=0;
|
||||||
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0;
|
create table t2(f1 int primary key, f2 blob)engine=innodb encrypted=yes stats_persistent=0;
|
||||||
|
create table t3(f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=no stats_persistent=0;
|
||||||
start transaction;
|
start transaction;
|
||||||
insert into t1 values(1, repeat('#',12));
|
insert into t1 values(1, repeat('#',12));
|
||||||
insert into t1 values(2, repeat('+',12));
|
insert into t1 values(2, repeat('+',12));
|
||||||
@@ -12,29 +13,37 @@ insert into t1 values(3, repeat('/',12));
|
|||||||
insert into t1 values(4, repeat('-',12));
|
insert into t1 values(4, repeat('-',12));
|
||||||
insert into t1 values(5, repeat('.',12));
|
insert into t1 values(5, repeat('.',12));
|
||||||
insert into t2 select * from t1;
|
insert into t2 select * from t1;
|
||||||
|
insert into t3 select * from t1;
|
||||||
commit work;
|
commit work;
|
||||||
SET GLOBAL innodb_fast_shutdown = 0;
|
SET GLOBAL innodb_fast_shutdown = 0;
|
||||||
# restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0
|
# restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0
|
||||||
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
|
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
|
||||||
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2';
|
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2';
|
||||||
|
select space into @t3_space_id from information_schema.innodb_sys_tablespaces where name='test/t3';
|
||||||
begin;
|
begin;
|
||||||
insert into t1 values (6, repeat('%', 400));
|
insert into t1 values (6, repeat('%', 400));
|
||||||
insert into t2 values (6, repeat('%', 400));
|
insert into t2 values (6, repeat('%', 400));
|
||||||
|
insert into t3 values (6, repeat('%', 400));
|
||||||
# xtrabackup prepare
|
# xtrabackup prepare
|
||||||
set global innodb_saved_page_number_debug = 3;
|
set global innodb_saved_page_number_debug = 3;
|
||||||
set global innodb_fil_make_page_dirty_debug = @t1_space_id;
|
set global innodb_fil_make_page_dirty_debug = @t1_space_id;
|
||||||
set global innodb_saved_page_number_debug = 3;
|
set global innodb_saved_page_number_debug = 3;
|
||||||
set global innodb_fil_make_page_dirty_debug = @t2_space_id;
|
set global innodb_fil_make_page_dirty_debug = @t2_space_id;
|
||||||
|
set global innodb_saved_page_number_debug = 3;
|
||||||
|
set global innodb_fil_make_page_dirty_debug = @t3_space_id;
|
||||||
set global innodb_buf_flush_list_now = 1;
|
set global innodb_buf_flush_list_now = 1;
|
||||||
# Kill the server
|
# Kill the server
|
||||||
# restart
|
# restart
|
||||||
FOUND 2 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err
|
FOUND 3 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err
|
||||||
check table t1;
|
check table t1;
|
||||||
Table Op Msg_type Msg_text
|
Table Op Msg_type Msg_text
|
||||||
test.t1 check status OK
|
test.t1 check status OK
|
||||||
check table t2;
|
check table t2;
|
||||||
Table Op Msg_type Msg_text
|
Table Op Msg_type Msg_text
|
||||||
test.t2 check status OK
|
test.t2 check status OK
|
||||||
|
check table t3;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t3 check status OK
|
||||||
select f1, f2 from t1;
|
select f1, f2 from t1;
|
||||||
f1 f2
|
f1 f2
|
||||||
1 ############
|
1 ############
|
||||||
@@ -49,6 +58,13 @@ f1 f2
|
|||||||
3 ////////////
|
3 ////////////
|
||||||
4 ------------
|
4 ------------
|
||||||
5 ............
|
5 ............
|
||||||
|
select f1, f2 from t3;
|
||||||
|
f1 f2
|
||||||
|
1 ############
|
||||||
|
2 ++++++++++++
|
||||||
|
3 ////////////
|
||||||
|
4 ------------
|
||||||
|
5 ............
|
||||||
SET GLOBAL innodb_fast_shutdown = 0;
|
SET GLOBAL innodb_fast_shutdown = 0;
|
||||||
# shutdown server
|
# shutdown server
|
||||||
# remove datadir
|
# remove datadir
|
||||||
@@ -78,4 +94,4 @@ f1 f2
|
|||||||
3 ////////////
|
3 ////////////
|
||||||
4 ------------
|
4 ------------
|
||||||
5 ............
|
5 ............
|
||||||
drop table t2, t1;
|
drop table t3, t2, t1;
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
--innodb-use-atomic-writes=0
|
--innodb-use-atomic-writes=0
|
||||||
--innodb-encrypt-tables=FORCE
|
--innodb-encrypt-tables=on
|
||||||
--innodb_sys_tablespaces
|
--innodb_sys_tablespaces
|
||||||
|
@@ -12,8 +12,9 @@ let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
|
|||||||
let MYSQLD_DATADIR=`select @@datadir`;
|
let MYSQLD_DATADIR=`select @@datadir`;
|
||||||
let ALGO=`select @@innodb_checksum_algorithm`;
|
let ALGO=`select @@innodb_checksum_algorithm`;
|
||||||
|
|
||||||
create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0;
|
create table t1 (f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=yes stats_persistent=0;
|
||||||
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0;
|
create table t2(f1 int primary key, f2 blob)engine=innodb encrypted=yes stats_persistent=0;
|
||||||
|
create table t3(f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=no stats_persistent=0;
|
||||||
|
|
||||||
start transaction;
|
start transaction;
|
||||||
insert into t1 values(1, repeat('#',12));
|
insert into t1 values(1, repeat('#',12));
|
||||||
@@ -22,6 +23,7 @@ insert into t1 values(3, repeat('/',12));
|
|||||||
insert into t1 values(4, repeat('-',12));
|
insert into t1 values(4, repeat('-',12));
|
||||||
insert into t1 values(5, repeat('.',12));
|
insert into t1 values(5, repeat('.',12));
|
||||||
insert into t2 select * from t1;
|
insert into t2 select * from t1;
|
||||||
|
insert into t3 select * from t1;
|
||||||
commit work;
|
commit work;
|
||||||
|
|
||||||
# Slow shutdown and restart to make sure ibuf merge is finished
|
# Slow shutdown and restart to make sure ibuf merge is finished
|
||||||
@@ -33,12 +35,14 @@ let $restart_parameters=--debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_fl
|
|||||||
|
|
||||||
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
|
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
|
||||||
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2';
|
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2';
|
||||||
|
select space into @t3_space_id from information_schema.innodb_sys_tablespaces where name='test/t3';
|
||||||
|
|
||||||
begin;
|
begin;
|
||||||
insert into t1 values (6, repeat('%', 400));
|
insert into t1 values (6, repeat('%', 400));
|
||||||
insert into t2 values (6, repeat('%', 400));
|
insert into t2 values (6, repeat('%', 400));
|
||||||
|
insert into t3 values (6, repeat('%', 400));
|
||||||
|
|
||||||
# Copy the t1.ibd, t2.ibd file
|
# Copy the t1.ibd, t2.ibd, t3.ibd file
|
||||||
let $targetdir=$MYSQLTEST_VARDIR/tmp/backup_1;
|
let $targetdir=$MYSQLTEST_VARDIR/tmp/backup_1;
|
||||||
--disable_result_log
|
--disable_result_log
|
||||||
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir;
|
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir;
|
||||||
@@ -54,8 +58,11 @@ set global innodb_fil_make_page_dirty_debug = @t1_space_id;
|
|||||||
set global innodb_saved_page_number_debug = 3;
|
set global innodb_saved_page_number_debug = 3;
|
||||||
set global innodb_fil_make_page_dirty_debug = @t2_space_id;
|
set global innodb_fil_make_page_dirty_debug = @t2_space_id;
|
||||||
|
|
||||||
|
set global innodb_saved_page_number_debug = 3;
|
||||||
|
set global innodb_fil_make_page_dirty_debug = @t3_space_id;
|
||||||
|
|
||||||
set global innodb_buf_flush_list_now = 1;
|
set global innodb_buf_flush_list_now = 1;
|
||||||
--let CLEANUP_IF_CHECKPOINT=drop table t1, t2, unexpected_checkpoint;
|
--let CLEANUP_IF_CHECKPOINT=drop table t1, t2, t3, unexpected_checkpoint;
|
||||||
--source ../../suite/innodb/include/no_checkpoint_end.inc
|
--source ../../suite/innodb/include/no_checkpoint_end.inc
|
||||||
# Corrupt the page 3 in t1.ibd, t2.ibd file
|
# Corrupt the page 3 in t1.ibd, t2.ibd file
|
||||||
perl;
|
perl;
|
||||||
@@ -103,6 +110,15 @@ binmode FILE;
|
|||||||
sysseek(FILE, 3*$page_size, 0);
|
sysseek(FILE, 3*$page_size, 0);
|
||||||
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
|
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
|
||||||
close FILE;
|
close FILE;
|
||||||
|
|
||||||
|
# Zero the complete page
|
||||||
|
my $fname= "$ENV{'MYSQLD_DATADIR'}test/t3.ibd";
|
||||||
|
open(FILE, "+<", $fname) or die;
|
||||||
|
FILE->autoflush(1);
|
||||||
|
binmode FILE;
|
||||||
|
sysseek(FILE, 3*$page_size, 0);
|
||||||
|
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
|
||||||
|
close FILE;
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# Successful recover from doublewrite buffer
|
# Successful recover from doublewrite buffer
|
||||||
@@ -114,8 +130,10 @@ let SEARCH_PATTERN=InnoDB: Recovered page \\[page id: space=[1-9]*, page number=
|
|||||||
|
|
||||||
check table t1;
|
check table t1;
|
||||||
check table t2;
|
check table t2;
|
||||||
|
check table t3;
|
||||||
select f1, f2 from t1;
|
select f1, f2 from t1;
|
||||||
select f1, f2 from t2;
|
select f1, f2 from t2;
|
||||||
|
select f1, f2 from t3;
|
||||||
|
|
||||||
SET GLOBAL innodb_fast_shutdown = 0;
|
SET GLOBAL innodb_fast_shutdown = 0;
|
||||||
let $shutdown_timeout=;
|
let $shutdown_timeout=;
|
||||||
@@ -220,4 +238,4 @@ select * from t1;
|
|||||||
|
|
||||||
--source ../../mariabackup/include/restart_and_restore.inc
|
--source ../../mariabackup/include/restart_and_restore.inc
|
||||||
select * from t1;
|
select * from t1;
|
||||||
drop table t2, t1;
|
drop table t3, t2, t1;
|
||||||
|
@@ -3766,10 +3766,9 @@ database_corrupted_compressed:
|
|||||||
if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED)
|
if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED)
|
||||||
{
|
{
|
||||||
release_page:
|
release_page:
|
||||||
if (node.space->full_crc32() && node.space->crypt_data &&
|
if (node.space->full_crc32() && recv_recovery_is_on() &&
|
||||||
recv_recovery_is_on() &&
|
recv_sys.dblwr.find_deferred_page(node, id().page_no(),
|
||||||
recv_sys.dblwr.find_encrypted_page(node, id().page_no(),
|
const_cast<byte*>(read_frame)))
|
||||||
const_cast<byte*>(read_frame)))
|
|
||||||
{
|
{
|
||||||
/* Recover from doublewrite buffer */
|
/* Recover from doublewrite buffer */
|
||||||
err= DB_SUCCESS;
|
err= DB_SUCCESS;
|
||||||
|
@@ -377,7 +377,7 @@ void buf_dblwr_t::recover() noexcept
|
|||||||
srv_page_size));
|
srv_page_size));
|
||||||
byte *const buf= read_buf + srv_page_size;
|
byte *const buf= read_buf + srv_page_size;
|
||||||
|
|
||||||
std::deque<byte*> encrypted_pages;
|
std::deque<byte*> deferred_pages;
|
||||||
for (recv_dblwr_t::list::iterator i= recv_sys.dblwr.pages.begin();
|
for (recv_dblwr_t::list::iterator i= recv_sys.dblwr.pages.begin();
|
||||||
i != recv_sys.dblwr.pages.end(); ++i, ++page_no_dblwr)
|
i != recv_sys.dblwr.pages.end(); ++i, ++page_no_dblwr)
|
||||||
{
|
{
|
||||||
@@ -396,11 +396,12 @@ void buf_dblwr_t::recover() noexcept
|
|||||||
{
|
{
|
||||||
/* These pages does not appear to belong to any tablespace.
|
/* These pages does not appear to belong to any tablespace.
|
||||||
There is a possibility that this page could be
|
There is a possibility that this page could be
|
||||||
encrypted using full_crc32 format. If innodb encounters
|
encrypted/compressed using full_crc32 format.
|
||||||
any corrupted encrypted page during recovery then
|
If innodb encounters any corrupted encrypted/compressed
|
||||||
InnoDB should use this page to find the valid page.
|
page during recovery then InnoDB should use this page to
|
||||||
See find_encrypted_page() */
|
find the valid page.
|
||||||
encrypted_pages.push_back(*i);
|
See find_encrypted_page()/find_page_compressed() */
|
||||||
|
deferred_pages.push_back(*i);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -481,7 +482,7 @@ next_page:
|
|||||||
}
|
}
|
||||||
|
|
||||||
recv_sys.dblwr.pages.clear();
|
recv_sys.dblwr.pages.clear();
|
||||||
for (byte *page : encrypted_pages)
|
for (byte *page : deferred_pages)
|
||||||
recv_sys.dblwr.pages.push_back(page);
|
recv_sys.dblwr.pages.push_back(page);
|
||||||
fil_flush_file_spaces();
|
fil_flush_file_spaces();
|
||||||
aligned_free(read_buf);
|
aligned_free(read_buf);
|
||||||
|
@@ -150,15 +150,17 @@ struct recv_dblwr_t
|
|||||||
const fil_space_t *space= nullptr,
|
const fil_space_t *space= nullptr,
|
||||||
byte *tmp_buf= nullptr) const noexcept;
|
byte *tmp_buf= nullptr) const noexcept;
|
||||||
|
|
||||||
/** Find the doublewrite copy of an encrypted page with the
|
/** Find the doublewrite copy of an encrypted/page_compressed
|
||||||
smallest FIL_PAGE_LSN that is large enough for recovery.
|
page with the smallest FIL_PAGE_LSN that is large enough for
|
||||||
|
recovery.
|
||||||
@param space tablespace object
|
@param space tablespace object
|
||||||
@param page_no page number to find
|
@param page_no page number to find
|
||||||
@param buf buffer for unencrypted page
|
@param buf buffer for unencrypted/uncompressed page
|
||||||
@return buf
|
@return buf
|
||||||
@retval nullptr if the page was not found in doublewrite buffer */
|
@retval nullptr if the page was not found in doublewrite buffer */
|
||||||
byte *find_encrypted_page(const fil_node_t &space, uint32_t page_no,
|
ATTRIBUTE_COLD byte *find_deferred_page(const fil_node_t &space,
|
||||||
byte *buf) noexcept;
|
uint32_t page_no,
|
||||||
|
byte *buf) noexcept;
|
||||||
|
|
||||||
/** Restore the first page of the given tablespace from
|
/** Restore the first page of the given tablespace from
|
||||||
doublewrite buffer.
|
doublewrite buffer.
|
||||||
|
@@ -4842,28 +4842,43 @@ bool recv_dblwr_t::validate_page(const page_id_t page_id, lsn_t max_lsn,
|
|||||||
goto check_if_corrupted;
|
goto check_if_corrupted;
|
||||||
}
|
}
|
||||||
|
|
||||||
byte *recv_dblwr_t::find_encrypted_page(const fil_node_t &node,
|
ATTRIBUTE_COLD
|
||||||
uint32_t page_no,
|
byte *recv_dblwr_t::find_deferred_page(const fil_node_t &node,
|
||||||
byte *buf) noexcept
|
uint32_t page_no,
|
||||||
|
byte *buf) noexcept
|
||||||
{
|
{
|
||||||
ut_ad(node.space->crypt_data);
|
|
||||||
ut_ad(node.space->full_crc32());
|
ut_ad(node.space->full_crc32());
|
||||||
mysql_mutex_lock(&recv_sys.mutex);
|
mysql_mutex_lock(&recv_sys.mutex);
|
||||||
byte *result_page= nullptr;
|
byte *result_page= nullptr;
|
||||||
|
bool is_encrypted= node.space->crypt_data &&
|
||||||
|
node.space->crypt_data->is_encrypted();
|
||||||
for (list::iterator page_it= pages.begin(); page_it != pages.end();
|
for (list::iterator page_it= pages.begin(); page_it != pages.end();
|
||||||
page_it++)
|
page_it++)
|
||||||
{
|
{
|
||||||
if (page_get_page_no(*page_it) != page_no ||
|
if (page_get_page_no(*page_it) != page_no ||
|
||||||
buf_page_is_corrupted(true, *page_it, node.space->flags))
|
buf_page_is_corrupted(true, *page_it, node.space->flags))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (is_encrypted &&
|
||||||
|
!mach_read_from_4(*page_it + FIL_PAGE_FCRC32_KEY_VERSION))
|
||||||
|
continue;
|
||||||
|
|
||||||
memcpy(buf, *page_it, node.space->physical_size());
|
memcpy(buf, *page_it, node.space->physical_size());
|
||||||
buf_tmp_buffer_t *slot= buf_pool.io_buf_reserve(false);
|
buf_tmp_buffer_t *slot= buf_pool.io_buf_reserve(false);
|
||||||
ut_a(slot);
|
ut_a(slot);
|
||||||
slot->allocate();
|
slot->allocate();
|
||||||
bool invalidate=
|
|
||||||
!fil_space_decrypt(node.space, slot->crypt_buf, buf) ||
|
bool invalidate= false;
|
||||||
(node.space->is_compressed() &&
|
if (is_encrypted)
|
||||||
!fil_page_decompress(slot->crypt_buf, buf, node.space->flags));
|
{
|
||||||
|
invalidate= !fil_space_decrypt(node.space, slot->crypt_buf, buf);
|
||||||
|
if (!invalidate && node.space->is_compressed())
|
||||||
|
goto decompress;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
decompress:
|
||||||
|
invalidate= !fil_page_decompress(slot->crypt_buf, buf,
|
||||||
|
node.space->flags);
|
||||||
slot->release();
|
slot->release();
|
||||||
|
|
||||||
if (invalidate ||
|
if (invalidate ||
|
||||||
|
Reference in New Issue
Block a user