From ea19a6b38cdab17b6b435f42c3bb722ee8fcc1e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 9 Jan 2025 13:18:30 +0200 Subject: [PATCH 1/3] MDEV-35699 Multi-batch recovery occasionally fails recv_sys_t::parse(): Do invoke fil_space_set_recv_size_and_flags() and do parse enough of page 0 to facilitate that. This fixes a regression that had been introduced in commit b249a059da2ef03eda35872a763e5453dbc39339 (MDEV-34850). In a multi-batch crash recovery, we would fail to invoke fil_space_set_recv_size_and_flags() while parsing the remaining log, before starting the first recovery batch. Reviewed by: Debarun Banerjee Tested by: Matthias Leich --- .../suite/encryption/r/recovery_memory.result | 15 +++++++ .../suite/encryption/t/recovery_memory.opt | 8 ++++ .../suite/encryption/t/recovery_memory.test | 45 +++++++++++++++++++ storage/innobase/log/log0recv.cc | 33 ++++++++------ 4 files changed, 88 insertions(+), 13 deletions(-) create mode 100644 mysql-test/suite/encryption/r/recovery_memory.result create mode 100644 mysql-test/suite/encryption/t/recovery_memory.opt create mode 100644 mysql-test/suite/encryption/t/recovery_memory.test diff --git a/mysql-test/suite/encryption/r/recovery_memory.result b/mysql-test/suite/encryption/r/recovery_memory.result new file mode 100644 index 00000000000..1b02fb6d31d --- /dev/null +++ b/mysql-test/suite/encryption/r/recovery_memory.result @@ -0,0 +1,15 @@ +CREATE TABLE t1(f1 text, index idx(f1(20))) ENGINE INNODB; +set global innodb_fast_shutdown=0; +# restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0 +set global debug_dbug="+d,ib_log_checkpoint_avoid_hard"; +INSERT INTO t1 SELECT repeat('a', 8000) FROM seq_1_to_1280; +DELETE FROM t1; +SET GLOBAL innodb_max_purge_lag_wait=0; +INSERT INTO t1 VALUES('a'); +# XTRABACKUP PREPARE +# restart +SELECT COUNT(*) FROM t1; +COUNT(*) +1 +ALTER TABLE t1 FORCE; +DROP TABLE t1; diff --git a/mysql-test/suite/encryption/t/recovery_memory.opt b/mysql-test/suite/encryption/t/recovery_memory.opt new file mode 100644 index 00000000000..125f90920df --- /dev/null +++ b/mysql-test/suite/encryption/t/recovery_memory.opt @@ -0,0 +1,8 @@ +--innodb_doublewrite=0 +--innodb_log_file_size=24m +--innodb_immediate_scrub_data_uncompressed=1 +--plugin-load-add=$FILE_KEY_MANAGEMENT_SO +--loose-file-key-management +--loose-file-key-management-filename=$MYSQL_TEST_DIR/std_data/logkey.txt +--file-key-management-encryption-algorithm=aes_cbc +--innodb-encrypt-log=1 diff --git a/mysql-test/suite/encryption/t/recovery_memory.test b/mysql-test/suite/encryption/t/recovery_memory.test new file mode 100644 index 00000000000..6187a29dba9 --- /dev/null +++ b/mysql-test/suite/encryption/t/recovery_memory.test @@ -0,0 +1,45 @@ +--source include/have_debug.inc +--source include/have_innodb.inc +--source include/have_sequence.inc +--source filekeys_plugin.inc + +let $basedir=$MYSQLTEST_VARDIR/tmp/backup; +let MYSQLD_DATADIR=`select @@datadir`; + +CREATE TABLE t1(f1 text, index idx(f1(20))) ENGINE INNODB; + +# No checkpoint happens during this restart + +let $shutdown_timeout=; +set global innodb_fast_shutdown=0; +let $restart_parameters=--debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0; +--source include/restart_mysqld.inc +set global debug_dbug="+d,ib_log_checkpoint_avoid_hard"; + +--disable_result_log +exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$basedir; +--enable_result_log + +INSERT INTO t1 SELECT repeat('a', 8000) FROM seq_1_to_1280; +DELETE FROM t1; +SET GLOBAL innodb_max_purge_lag_wait=0; +INSERT INTO t1 VALUES('a'); + +--echo # XTRABACKUP PREPARE +exec $XTRABACKUP --prepare --target-dir=$basedir; + +let $shutdown_timeout=0; +--source include/shutdown_mysqld.inc + +# Since there is no checkpoint during previous run, we can +# Copy the datafile from t1.ibd and start the server + +remove_file $MYSQLD_DATADIR/test/t1.ibd; +copy_file $basedir/test/t1.ibd $MYSQLD_DATADIR/test/t1.ibd; +--enable_result_log +let $restart_parameters=; +--source include/start_mysqld.inc + +SELECT COUNT(*) FROM t1; +ALTER TABLE t1 FORCE; +DROP TABLE t1; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index d0f68996001..1e692f92447 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2509,7 +2509,7 @@ restart: ut_ad(log_sys.is_latest()); alignas(8) byte iv[MY_AES_BLOCK_SIZE]; - byte *decrypt_buf= storing == YES + byte *decrypt_buf= storing != BACKUP ? static_cast(alloca(srv_page_size)) : nullptr; const lsn_t start_lsn{lsn}; @@ -2654,7 +2654,7 @@ restart: sql_print_warning("InnoDB: Ignoring malformed log record at LSN " LSN_PF, lsn); /* the next record must not be same_page */ - if (storing == YES) last_offset= 1; + if (storing != BACKUP) last_offset= 1; continue; } if (srv_operation == SRV_OPERATION_BACKUP) @@ -2664,7 +2664,7 @@ restart: lsn, b, l - recs + rlen, space_id, page_no)); goto same_page; } - if (storing == YES) last_offset= 0; + if (storing != BACKUP) last_offset= 0; idlen= mlog_decode_varint_length(*l); if (UNIV_UNLIKELY(idlen > 5 || idlen >= rlen)) { @@ -2695,7 +2695,7 @@ restart: goto page_id_corrupted; l+= idlen; rlen-= idlen; - if (storing == YES) + if (storing != BACKUP) { mach_write_to_4(iv + 8, space_id); mach_write_to_4(iv + 12, page_no); @@ -2745,15 +2745,15 @@ restart: ut_d(if ((b & 0x70) == INIT_PAGE || (b & 0x70) == OPTION) freed.erase(id)); ut_ad(freed.find(id) == freed.end()); - const byte *cl= storing == NO ? nullptr : l.ptr; + const byte *cl= nullptr; /* avoid bogus -Wmaybe-uninitialized */ switch (b & 0x70) { case FREE_PAGE: ut_ad(freed.emplace(id).second); /* the next record must not be same_page */ - if (storing == YES) last_offset= 1; + if (storing != BACKUP) last_offset= 1; goto free_or_init_page; case INIT_PAGE: - if (storing == YES) last_offset= FIL_PAGE_TYPE; + if (storing != BACKUP) last_offset= FIL_PAGE_TYPE; free_or_init_page: if (storing == BACKUP) continue; @@ -2808,13 +2808,14 @@ restart: trim({space_id, 0}, start_lsn); truncated_undo_spaces[space_id - srv_undo_space_id_start]= { start_lsn, page_no }; - if (storing == BACKUP && undo_space_trunc) + if (storing != BACKUP) + /* the next record must not be same_page */ + last_offset= 1; + else if (undo_space_trunc) undo_space_trunc(space_id); - /* the next record must not be same_page */ - if (storing == YES) last_offset= 1; continue; } - if (storing == YES) last_offset= FIL_PAGE_TYPE; + if (storing != BACKUP) last_offset= FIL_PAGE_TYPE; break; case OPTION: if (storing == YES && rlen == 5 && *l == OPT_PAGE_CHECKSUM) @@ -2825,7 +2826,13 @@ restart: case WRITE: case MEMMOVE: case MEMSET: - if (storing != YES) + if (storing == BACKUP) + continue; + if (storing == NO && UNIV_LIKELY(page_no != 0)) + /* fil_space_set_recv_size_and_flags() is mandatory for storing==NO. + It is only applicable to page_no == 0. Other than that, we can just + ignore the payload and only compute the mini-transaction checksum; + there will be a subsequent call with storing==YES. */ continue; if (UNIV_UNLIKELY(rlen == 0 || last_offset == 1)) goto record_corrupted; @@ -2867,7 +2874,7 @@ restart: last_offset) : file_name_t::initial_flags; if (it == recv_spaces.end()) - ut_ad(space_id == TRX_SYS_SPACE || + ut_ad(storing == NO || space_id == TRX_SYS_SPACE || srv_is_undo_tablespace(space_id)); else if (!it->second.space) { From ed13d93a256eed0c48ffda7b8fb0f6969e886574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 9 Jan 2025 13:18:42 +0200 Subject: [PATCH 2/3] Fix mariadb-backup --backup with innodb_undo_log_truncate=ON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes another regression that had been introduced in commit b249a059da2ef03eda35872a763e5453dbc39339 (MDEV-34850). This should prevent failures of mariadb-backup --backup of the following type: mariabackup: Failed to read undo log tablespace space id … and there is no undo tablespace truncation redo record. This error has not been hit by our internal testing, and we currently have no regression test to cover this. --- storage/innobase/log/log0recv.cc | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 1e692f92447..d4bdfbad391 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2791,10 +2791,28 @@ restart: cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); break; case EXTENDED: - if (storing != YES) + if (storing == NO) + /* We really only care about WRITE records to page 0, to + invoke fil_space_set_recv_size_and_flags(). As of now, the + EXTENDED records refer to index or undo log pages (which + page 0 never can be), or we have the TRIM_PAGES subtype for + shrinking a tablespace, to a larger number of pages than 0. + Either way, we can ignore this record during the preparation + for multi-batch recovery. */ continue; if (UNIV_UNLIKELY(!rlen)) goto record_corrupted; + if (storing == BACKUP) + { + if (rlen == 1 && undo_space_trunc) + { + cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); + if (*cl == TRIM_PAGES) + undo_space_trunc(space_id); + } + continue; + } + cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); if (rlen == 1 && *cl == TRIM_PAGES) { @@ -2815,7 +2833,10 @@ restart: undo_space_trunc(space_id); continue; } - if (storing != BACKUP) last_offset= FIL_PAGE_TYPE; + /* This record applies to an undo log or index page, and it + may be followed by subsequent WRITE or similar records for the + same page in the same mini-transaction. */ + last_offset= FIL_PAGE_TYPE; break; case OPTION: if (storing == YES && rlen == 5 && *l == OPT_PAGE_CHECKSUM) From 81633f47c3febe056cd4c1e704dbe9a70c7b6357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 9 Jan 2025 13:21:38 +0200 Subject: [PATCH 3/3] MDEV-35796 OPT_PAGE_CHECKSUM is ignored if innodb_encrypt_log=ON recv_sys_t::parse(): When parsing an OPTION record, invoke l.copy_if_needed() before checking if the payload is OPT_PAGE_CHECKSUM followed by a 32-bit page checksum. This fixes up the merge 57d4a242dac0238dd63ac05cd9d65a38d75a5d5f of commit 4179f93d28035ea2798cb1c16feeaaef87ab4775 (MDEV-18976). The impact of this can be observed by running a debug instrumented build on the test encryption.recovery_memory. There should be over 5,000 invocations of log_phys_t::page_checksum(). Without this fix, there should be less than 100 of them (when the OPT_PAGE_CHECKSUM byte happens to encrypt to itself). Reviewed by: Debarun Banerjee Tested by: Matthias Leich --- storage/innobase/log/log0recv.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index d4bdfbad391..1e5ea6f5a6d 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2787,7 +2787,6 @@ restart: erase(r); continue; } - copy_if_needed: cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); break; case EXTENDED: @@ -2839,8 +2838,14 @@ restart: last_offset= FIL_PAGE_TYPE; break; case OPTION: - if (storing == YES && rlen == 5 && *l == OPT_PAGE_CHECKSUM) - goto copy_if_needed; + /* OPTION records can be safely ignored in recovery */ + if (storing == YES && + rlen == 5/* OPT_PAGE_CHECKSUM and CRC-32C; see page_checksum() */) + { + cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); + if (*cl == OPT_PAGE_CHECKSUM) + break; + } /* fall through */ case RESERVED: continue;