1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

MDEV-36180 Doublewrite recovery of innodb_checksum_algorithm=full_crc32 page_compressed pages does not work

- InnoDB fails to recover the full crc32 page_compressed page
from doublewrite buffer. The reason is that buf_dblwr_t::recover()
fails to identify the space id from the page because the page
has compressed from FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION bytes.

Fix:
===
recv_dblwr_t::find_deferred_page(): Find the page which
has the same page number and try to decompress/decrypt the page
based on the tablespace metadata. After the decompression/decryption,
compare the space id and write the recovered page back to the file.

buf_page_t::read_complete(): Page read from disk is corrupted then
try to read the page from deferred pages in doublewrite buffer.
This commit is contained in:
Thirunarayanan Balathandayuthapani
2025-03-26 11:15:09 +05:30
committed by Sergei Golubchik
parent 19c4e1abe4
commit a390aaaf23
7 changed files with 85 additions and 34 deletions

View File

@@ -3,8 +3,9 @@ call mtr.add_suppression("InnoDB: Unable to apply log to corrupted page ");
call mtr.add_suppression("InnoDB: Plugin initialization aborted"); call mtr.add_suppression("InnoDB: Plugin initialization aborted");
call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0; create table t1 (f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=yes stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0; create table t2(f1 int primary key, f2 blob)engine=innodb encrypted=yes stats_persistent=0;
create table t3(f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=no stats_persistent=0;
start transaction; start transaction;
insert into t1 values(1, repeat('#',12)); insert into t1 values(1, repeat('#',12));
insert into t1 values(2, repeat('+',12)); insert into t1 values(2, repeat('+',12));
@@ -12,29 +13,37 @@ insert into t1 values(3, repeat('/',12));
insert into t1 values(4, repeat('-',12)); insert into t1 values(4, repeat('-',12));
insert into t1 values(5, repeat('.',12)); insert into t1 values(5, repeat('.',12));
insert into t2 select * from t1; insert into t2 select * from t1;
insert into t3 select * from t1;
commit work; commit work;
SET GLOBAL innodb_fast_shutdown = 0; SET GLOBAL innodb_fast_shutdown = 0;
# restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0 # restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1'; select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2'; select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2';
select space into @t3_space_id from information_schema.innodb_sys_tablespaces where name='test/t3';
begin; begin;
insert into t1 values (6, repeat('%', 400)); insert into t1 values (6, repeat('%', 400));
insert into t2 values (6, repeat('%', 400)); insert into t2 values (6, repeat('%', 400));
insert into t3 values (6, repeat('%', 400));
# xtrabackup prepare # xtrabackup prepare
set global innodb_saved_page_number_debug = 3; set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t1_space_id; set global innodb_fil_make_page_dirty_debug = @t1_space_id;
set global innodb_saved_page_number_debug = 3; set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t2_space_id; set global innodb_fil_make_page_dirty_debug = @t2_space_id;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t3_space_id;
set global innodb_buf_flush_list_now = 1; set global innodb_buf_flush_list_now = 1;
# Kill the server # Kill the server
# restart # restart
FOUND 2 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err FOUND 3 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err
check table t1; check table t1;
Table Op Msg_type Msg_text Table Op Msg_type Msg_text
test.t1 check status OK test.t1 check status OK
check table t2; check table t2;
Table Op Msg_type Msg_text Table Op Msg_type Msg_text
test.t2 check status OK test.t2 check status OK
check table t3;
Table Op Msg_type Msg_text
test.t3 check status OK
select f1, f2 from t1; select f1, f2 from t1;
f1 f2 f1 f2
1 ############ 1 ############
@@ -49,6 +58,13 @@ f1 f2
3 //////////// 3 ////////////
4 ------------ 4 ------------
5 ............ 5 ............
select f1, f2 from t3;
f1 f2
1 ############
2 ++++++++++++
3 ////////////
4 ------------
5 ............
SET GLOBAL innodb_fast_shutdown = 0; SET GLOBAL innodb_fast_shutdown = 0;
# shutdown server # shutdown server
# remove datadir # remove datadir
@@ -78,4 +94,4 @@ f1 f2
3 //////////// 3 ////////////
4 ------------ 4 ------------
5 ............ 5 ............
drop table t2, t1; drop table t3, t2, t1;

View File

@@ -1,3 +1,3 @@
--innodb-use-atomic-writes=0 --innodb-use-atomic-writes=0
--innodb-encrypt-tables=FORCE --innodb-encrypt-tables=on
--innodb_sys_tablespaces --innodb_sys_tablespaces

View File

@@ -12,8 +12,9 @@ let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
let MYSQLD_DATADIR=`select @@datadir`; let MYSQLD_DATADIR=`select @@datadir`;
let ALGO=`select @@innodb_checksum_algorithm`; let ALGO=`select @@innodb_checksum_algorithm`;
create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0; create table t1 (f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=yes stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0; create table t2(f1 int primary key, f2 blob)engine=innodb encrypted=yes stats_persistent=0;
create table t3(f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=no stats_persistent=0;
start transaction; start transaction;
insert into t1 values(1, repeat('#',12)); insert into t1 values(1, repeat('#',12));
@@ -22,6 +23,7 @@ insert into t1 values(3, repeat('/',12));
insert into t1 values(4, repeat('-',12)); insert into t1 values(4, repeat('-',12));
insert into t1 values(5, repeat('.',12)); insert into t1 values(5, repeat('.',12));
insert into t2 select * from t1; insert into t2 select * from t1;
insert into t3 select * from t1;
commit work; commit work;
# Slow shutdown and restart to make sure ibuf merge is finished # Slow shutdown and restart to make sure ibuf merge is finished
@@ -33,12 +35,14 @@ let $restart_parameters=--debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_fl
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1'; select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2'; select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2';
select space into @t3_space_id from information_schema.innodb_sys_tablespaces where name='test/t3';
begin; begin;
insert into t1 values (6, repeat('%', 400)); insert into t1 values (6, repeat('%', 400));
insert into t2 values (6, repeat('%', 400)); insert into t2 values (6, repeat('%', 400));
insert into t3 values (6, repeat('%', 400));
# Copy the t1.ibd, t2.ibd file # Copy the t1.ibd, t2.ibd, t3.ibd file
let $targetdir=$MYSQLTEST_VARDIR/tmp/backup_1; let $targetdir=$MYSQLTEST_VARDIR/tmp/backup_1;
--disable_result_log --disable_result_log
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir; exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir;
@@ -54,8 +58,11 @@ set global innodb_fil_make_page_dirty_debug = @t1_space_id;
set global innodb_saved_page_number_debug = 3; set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t2_space_id; set global innodb_fil_make_page_dirty_debug = @t2_space_id;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t3_space_id;
set global innodb_buf_flush_list_now = 1; set global innodb_buf_flush_list_now = 1;
--let CLEANUP_IF_CHECKPOINT=drop table t1, t2, unexpected_checkpoint; --let CLEANUP_IF_CHECKPOINT=drop table t1, t2, t3, unexpected_checkpoint;
--source ../../suite/innodb/include/no_checkpoint_end.inc --source ../../suite/innodb/include/no_checkpoint_end.inc
# Corrupt the page 3 in t1.ibd, t2.ibd file # Corrupt the page 3 in t1.ibd, t2.ibd file
perl; perl;
@@ -103,6 +110,15 @@ binmode FILE;
sysseek(FILE, 3*$page_size, 0); sysseek(FILE, 3*$page_size, 0);
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'}); print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
close FILE; close FILE;
# Zero the complete page
my $fname= "$ENV{'MYSQLD_DATADIR'}test/t3.ibd";
open(FILE, "+<", $fname) or die;
FILE->autoflush(1);
binmode FILE;
sysseek(FILE, 3*$page_size, 0);
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
close FILE;
EOF EOF
# Successful recover from doublewrite buffer # Successful recover from doublewrite buffer
@@ -114,8 +130,10 @@ let SEARCH_PATTERN=InnoDB: Recovered page \\[page id: space=[1-9]*, page number=
check table t1; check table t1;
check table t2; check table t2;
check table t3;
select f1, f2 from t1; select f1, f2 from t1;
select f1, f2 from t2; select f1, f2 from t2;
select f1, f2 from t3;
SET GLOBAL innodb_fast_shutdown = 0; SET GLOBAL innodb_fast_shutdown = 0;
let $shutdown_timeout=; let $shutdown_timeout=;
@@ -220,4 +238,4 @@ select * from t1;
--source ../../mariabackup/include/restart_and_restore.inc --source ../../mariabackup/include/restart_and_restore.inc
select * from t1; select * from t1;
drop table t2, t1; drop table t3, t2, t1;

View File

@@ -3766,10 +3766,9 @@ database_corrupted_compressed:
if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED) if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED)
{ {
release_page: release_page:
if (node.space->full_crc32() && node.space->crypt_data && if (node.space->full_crc32() && recv_recovery_is_on() &&
recv_recovery_is_on() && recv_sys.dblwr.find_deferred_page(node, id().page_no(),
recv_sys.dblwr.find_encrypted_page(node, id().page_no(), const_cast<byte*>(read_frame)))
const_cast<byte*>(read_frame)))
{ {
/* Recover from doublewrite buffer */ /* Recover from doublewrite buffer */
err= DB_SUCCESS; err= DB_SUCCESS;

View File

@@ -377,7 +377,7 @@ void buf_dblwr_t::recover() noexcept
srv_page_size)); srv_page_size));
byte *const buf= read_buf + srv_page_size; byte *const buf= read_buf + srv_page_size;
std::deque<byte*> encrypted_pages; std::deque<byte*> deferred_pages;
for (recv_dblwr_t::list::iterator i= recv_sys.dblwr.pages.begin(); for (recv_dblwr_t::list::iterator i= recv_sys.dblwr.pages.begin();
i != recv_sys.dblwr.pages.end(); ++i, ++page_no_dblwr) i != recv_sys.dblwr.pages.end(); ++i, ++page_no_dblwr)
{ {
@@ -396,11 +396,12 @@ void buf_dblwr_t::recover() noexcept
{ {
/* These pages does not appear to belong to any tablespace. /* These pages does not appear to belong to any tablespace.
There is a possibility that this page could be There is a possibility that this page could be
encrypted using full_crc32 format. If innodb encounters encrypted/compressed using full_crc32 format.
any corrupted encrypted page during recovery then If innodb encounters any corrupted encrypted/compressed
InnoDB should use this page to find the valid page. page during recovery then InnoDB should use this page to
See find_encrypted_page() */ find the valid page.
encrypted_pages.push_back(*i); See find_encrypted_page()/find_page_compressed() */
deferred_pages.push_back(*i);
continue; continue;
} }
@@ -481,7 +482,7 @@ next_page:
} }
recv_sys.dblwr.pages.clear(); recv_sys.dblwr.pages.clear();
for (byte *page : encrypted_pages) for (byte *page : deferred_pages)
recv_sys.dblwr.pages.push_back(page); recv_sys.dblwr.pages.push_back(page);
fil_flush_file_spaces(); fil_flush_file_spaces();
aligned_free(read_buf); aligned_free(read_buf);

View File

@@ -150,15 +150,17 @@ struct recv_dblwr_t
const fil_space_t *space= nullptr, const fil_space_t *space= nullptr,
byte *tmp_buf= nullptr) const noexcept; byte *tmp_buf= nullptr) const noexcept;
/** Find the doublewrite copy of an encrypted page with the /** Find the doublewrite copy of an encrypted/page_compressed
smallest FIL_PAGE_LSN that is large enough for recovery. page with the smallest FIL_PAGE_LSN that is large enough for
recovery.
@param space tablespace object @param space tablespace object
@param page_no page number to find @param page_no page number to find
@param buf buffer for unencrypted page @param buf buffer for unencrypted/uncompressed page
@return buf @return buf
@retval nullptr if the page was not found in doublewrite buffer */ @retval nullptr if the page was not found in doublewrite buffer */
byte *find_encrypted_page(const fil_node_t &space, uint32_t page_no, ATTRIBUTE_COLD byte *find_deferred_page(const fil_node_t &space,
byte *buf) noexcept; uint32_t page_no,
byte *buf) noexcept;
/** Restore the first page of the given tablespace from /** Restore the first page of the given tablespace from
doublewrite buffer. doublewrite buffer.

View File

@@ -4842,28 +4842,43 @@ bool recv_dblwr_t::validate_page(const page_id_t page_id, lsn_t max_lsn,
goto check_if_corrupted; goto check_if_corrupted;
} }
byte *recv_dblwr_t::find_encrypted_page(const fil_node_t &node, ATTRIBUTE_COLD
uint32_t page_no, byte *recv_dblwr_t::find_deferred_page(const fil_node_t &node,
byte *buf) noexcept uint32_t page_no,
byte *buf) noexcept
{ {
ut_ad(node.space->crypt_data);
ut_ad(node.space->full_crc32()); ut_ad(node.space->full_crc32());
mysql_mutex_lock(&recv_sys.mutex); mysql_mutex_lock(&recv_sys.mutex);
byte *result_page= nullptr; byte *result_page= nullptr;
bool is_encrypted= node.space->crypt_data &&
node.space->crypt_data->is_encrypted();
for (list::iterator page_it= pages.begin(); page_it != pages.end(); for (list::iterator page_it= pages.begin(); page_it != pages.end();
page_it++) page_it++)
{ {
if (page_get_page_no(*page_it) != page_no || if (page_get_page_no(*page_it) != page_no ||
buf_page_is_corrupted(true, *page_it, node.space->flags)) buf_page_is_corrupted(true, *page_it, node.space->flags))
continue; continue;
if (is_encrypted &&
!mach_read_from_4(*page_it + FIL_PAGE_FCRC32_KEY_VERSION))
continue;
memcpy(buf, *page_it, node.space->physical_size()); memcpy(buf, *page_it, node.space->physical_size());
buf_tmp_buffer_t *slot= buf_pool.io_buf_reserve(false); buf_tmp_buffer_t *slot= buf_pool.io_buf_reserve(false);
ut_a(slot); ut_a(slot);
slot->allocate(); slot->allocate();
bool invalidate=
!fil_space_decrypt(node.space, slot->crypt_buf, buf) || bool invalidate= false;
(node.space->is_compressed() && if (is_encrypted)
!fil_page_decompress(slot->crypt_buf, buf, node.space->flags)); {
invalidate= !fil_space_decrypt(node.space, slot->crypt_buf, buf);
if (!invalidate && node.space->is_compressed())
goto decompress;
}
else
decompress:
invalidate= !fil_page_decompress(slot->crypt_buf, buf,
node.space->flags);
slot->release(); slot->release();
if (invalidate || if (invalidate ||