1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

MDEV-36180 Doublewrite recovery of innodb_checksum_algorithm=full_crc32 page_compressed pages does not work

- InnoDB fails to recover the full crc32 page_compressed page
from doublewrite buffer. The reason is that buf_dblwr_t::recover()
fails to identify the space id from the page because the page
has compressed from FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION bytes.

Fix:
===
recv_dblwr_t::find_deferred_page(): Find the page which
has the same page number and try to decompress/decrypt the page
based on the tablespace metadata. After the decompression/decryption,
compare the space id and write the recovered page back to the file.

buf_page_t::read_complete(): Page read from disk is corrupted then
try to read the page from deferred pages in doublewrite buffer.
This commit is contained in:
Thirunarayanan Balathandayuthapani
2025-03-26 11:15:09 +05:30
committed by Sergei Golubchik
parent 19c4e1abe4
commit a390aaaf23
7 changed files with 85 additions and 34 deletions

View File

@@ -3,8 +3,9 @@ call mtr.add_suppression("InnoDB: Unable to apply log to corrupted page ");
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0;
create table t1 (f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=yes stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb encrypted=yes stats_persistent=0;
create table t3(f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=no stats_persistent=0;
start transaction;
insert into t1 values(1, repeat('#',12));
insert into t1 values(2, repeat('+',12));
@@ -12,29 +13,37 @@ insert into t1 values(3, repeat('/',12));
insert into t1 values(4, repeat('-',12));
insert into t1 values(5, repeat('.',12));
insert into t2 select * from t1;
insert into t3 select * from t1;
commit work;
SET GLOBAL innodb_fast_shutdown = 0;
# restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2';
select space into @t3_space_id from information_schema.innodb_sys_tablespaces where name='test/t3';
begin;
insert into t1 values (6, repeat('%', 400));
insert into t2 values (6, repeat('%', 400));
insert into t3 values (6, repeat('%', 400));
# xtrabackup prepare
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t1_space_id;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t2_space_id;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t3_space_id;
set global innodb_buf_flush_list_now = 1;
# Kill the server
# restart
FOUND 2 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err
FOUND 3 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
check table t2;
Table Op Msg_type Msg_text
test.t2 check status OK
check table t3;
Table Op Msg_type Msg_text
test.t3 check status OK
select f1, f2 from t1;
f1 f2
1 ############
@@ -49,6 +58,13 @@ f1 f2
3 ////////////
4 ------------
5 ............
select f1, f2 from t3;
f1 f2
1 ############
2 ++++++++++++
3 ////////////
4 ------------
5 ............
SET GLOBAL innodb_fast_shutdown = 0;
# shutdown server
# remove datadir
@@ -78,4 +94,4 @@ f1 f2
3 ////////////
4 ------------
5 ............
drop table t2, t1;
drop table t3, t2, t1;

View File

@@ -1,3 +1,3 @@
--innodb-use-atomic-writes=0
--innodb-encrypt-tables=FORCE
--innodb-encrypt-tables=on
--innodb_sys_tablespaces

View File

@@ -12,8 +12,9 @@ let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
let MYSQLD_DATADIR=`select @@datadir`;
let ALGO=`select @@innodb_checksum_algorithm`;
create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0;
create table t1 (f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=yes stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb encrypted=yes stats_persistent=0;
create table t3(f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=no stats_persistent=0;
start transaction;
insert into t1 values(1, repeat('#',12));
@@ -22,6 +23,7 @@ insert into t1 values(3, repeat('/',12));
insert into t1 values(4, repeat('-',12));
insert into t1 values(5, repeat('.',12));
insert into t2 select * from t1;
insert into t3 select * from t1;
commit work;
# Slow shutdown and restart to make sure ibuf merge is finished
@@ -33,12 +35,14 @@ let $restart_parameters=--debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_fl
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2';
select space into @t3_space_id from information_schema.innodb_sys_tablespaces where name='test/t3';
begin;
insert into t1 values (6, repeat('%', 400));
insert into t2 values (6, repeat('%', 400));
insert into t3 values (6, repeat('%', 400));
# Copy the t1.ibd, t2.ibd file
# Copy the t1.ibd, t2.ibd, t3.ibd file
let $targetdir=$MYSQLTEST_VARDIR/tmp/backup_1;
--disable_result_log
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir;
@@ -54,8 +58,11 @@ set global innodb_fil_make_page_dirty_debug = @t1_space_id;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t2_space_id;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t3_space_id;
set global innodb_buf_flush_list_now = 1;
--let CLEANUP_IF_CHECKPOINT=drop table t1, t2, unexpected_checkpoint;
--let CLEANUP_IF_CHECKPOINT=drop table t1, t2, t3, unexpected_checkpoint;
--source ../../suite/innodb/include/no_checkpoint_end.inc
# Corrupt the page 3 in t1.ibd, t2.ibd file
perl;
@@ -103,6 +110,15 @@ binmode FILE;
sysseek(FILE, 3*$page_size, 0);
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
close FILE;
# Zero the complete page
my $fname= "$ENV{'MYSQLD_DATADIR'}test/t3.ibd";
open(FILE, "+<", $fname) or die;
FILE->autoflush(1);
binmode FILE;
sysseek(FILE, 3*$page_size, 0);
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
close FILE;
EOF
# Successful recover from doublewrite buffer
@@ -114,8 +130,10 @@ let SEARCH_PATTERN=InnoDB: Recovered page \\[page id: space=[1-9]*, page number=
check table t1;
check table t2;
check table t3;
select f1, f2 from t1;
select f1, f2 from t2;
select f1, f2 from t3;
SET GLOBAL innodb_fast_shutdown = 0;
let $shutdown_timeout=;
@@ -220,4 +238,4 @@ select * from t1;
--source ../../mariabackup/include/restart_and_restore.inc
select * from t1;
drop table t2, t1;
drop table t3, t2, t1;

View File

@@ -3766,9 +3766,8 @@ database_corrupted_compressed:
if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED)
{
release_page:
if (node.space->full_crc32() && node.space->crypt_data &&
recv_recovery_is_on() &&
recv_sys.dblwr.find_encrypted_page(node, id().page_no(),
if (node.space->full_crc32() && recv_recovery_is_on() &&
recv_sys.dblwr.find_deferred_page(node, id().page_no(),
const_cast<byte*>(read_frame)))
{
/* Recover from doublewrite buffer */

View File

@@ -377,7 +377,7 @@ void buf_dblwr_t::recover() noexcept
srv_page_size));
byte *const buf= read_buf + srv_page_size;
std::deque<byte*> encrypted_pages;
std::deque<byte*> deferred_pages;
for (recv_dblwr_t::list::iterator i= recv_sys.dblwr.pages.begin();
i != recv_sys.dblwr.pages.end(); ++i, ++page_no_dblwr)
{
@@ -396,11 +396,12 @@ void buf_dblwr_t::recover() noexcept
{
/* These pages does not appear to belong to any tablespace.
There is a possibility that this page could be
encrypted using full_crc32 format. If innodb encounters
any corrupted encrypted page during recovery then
InnoDB should use this page to find the valid page.
See find_encrypted_page() */
encrypted_pages.push_back(*i);
encrypted/compressed using full_crc32 format.
If innodb encounters any corrupted encrypted/compressed
page during recovery then InnoDB should use this page to
find the valid page.
See find_encrypted_page()/find_page_compressed() */
deferred_pages.push_back(*i);
continue;
}
@@ -481,7 +482,7 @@ next_page:
}
recv_sys.dblwr.pages.clear();
for (byte *page : encrypted_pages)
for (byte *page : deferred_pages)
recv_sys.dblwr.pages.push_back(page);
fil_flush_file_spaces();
aligned_free(read_buf);

View File

@@ -150,14 +150,16 @@ struct recv_dblwr_t
const fil_space_t *space= nullptr,
byte *tmp_buf= nullptr) const noexcept;
/** Find the doublewrite copy of an encrypted page with the
smallest FIL_PAGE_LSN that is large enough for recovery.
/** Find the doublewrite copy of an encrypted/page_compressed
page with the smallest FIL_PAGE_LSN that is large enough for
recovery.
@param space tablespace object
@param page_no page number to find
@param buf buffer for unencrypted page
@param buf buffer for unencrypted/uncompressed page
@return buf
@retval nullptr if the page was not found in doublewrite buffer */
byte *find_encrypted_page(const fil_node_t &space, uint32_t page_no,
ATTRIBUTE_COLD byte *find_deferred_page(const fil_node_t &space,
uint32_t page_no,
byte *buf) noexcept;
/** Restore the first page of the given tablespace from

View File

@@ -4842,28 +4842,43 @@ bool recv_dblwr_t::validate_page(const page_id_t page_id, lsn_t max_lsn,
goto check_if_corrupted;
}
byte *recv_dblwr_t::find_encrypted_page(const fil_node_t &node,
ATTRIBUTE_COLD
byte *recv_dblwr_t::find_deferred_page(const fil_node_t &node,
uint32_t page_no,
byte *buf) noexcept
{
ut_ad(node.space->crypt_data);
ut_ad(node.space->full_crc32());
mysql_mutex_lock(&recv_sys.mutex);
byte *result_page= nullptr;
bool is_encrypted= node.space->crypt_data &&
node.space->crypt_data->is_encrypted();
for (list::iterator page_it= pages.begin(); page_it != pages.end();
page_it++)
{
if (page_get_page_no(*page_it) != page_no ||
buf_page_is_corrupted(true, *page_it, node.space->flags))
continue;
if (is_encrypted &&
!mach_read_from_4(*page_it + FIL_PAGE_FCRC32_KEY_VERSION))
continue;
memcpy(buf, *page_it, node.space->physical_size());
buf_tmp_buffer_t *slot= buf_pool.io_buf_reserve(false);
ut_a(slot);
slot->allocate();
bool invalidate=
!fil_space_decrypt(node.space, slot->crypt_buf, buf) ||
(node.space->is_compressed() &&
!fil_page_decompress(slot->crypt_buf, buf, node.space->flags));
bool invalidate= false;
if (is_encrypted)
{
invalidate= !fil_space_decrypt(node.space, slot->crypt_buf, buf);
if (!invalidate && node.space->is_compressed())
goto decompress;
}
else
decompress:
invalidate= !fil_page_decompress(slot->crypt_buf, buf,
node.space->flags);
slot->release();
if (invalidate ||