diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index c32ba4cc1be..673ff3999cd 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -1582,9 +1582,8 @@ struct my_option xb_server_options[] = {"innodb_log_file_size", OPT_INNODB_LOG_FILE_SIZE, "Ignored for mysqld option compatibility", (G_PTR*) &srv_log_file_size, (G_PTR*) &srv_log_file_size, 0, - GET_ULL, REQUIRED_ARG, 48 << 20, 1 << 20, - std::numeric_limits::max(), 0, - UNIV_PAGE_SIZE_MAX, 0}, + GET_ULL, REQUIRED_ARG, 96 << 20, 4 << 20, + std::numeric_limits::max(), 0, 4096, 0}, {"innodb_log_group_home_dir", OPT_INNODB_LOG_GROUP_HOME_DIR, "Path to InnoDB log files.", &srv_log_group_home_dir, &srv_log_group_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, @@ -2540,10 +2539,10 @@ void xtrabackup_io_throttling() if (!xtrabackup_backup) return; - mysql_mutex_lock(&log_sys.mutex); + mysql_mutex_lock(&recv_sys.mutex); if (xtrabackup_throttle && (io_ticket--) < 0) - mysql_cond_wait(&wait_throttle, &log_sys.mutex); - mysql_mutex_unlock(&log_sys.mutex); + mysql_cond_wait(&wait_throttle, &recv_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); } static @@ -2972,7 +2971,7 @@ skip: @return whether the operation failed */ static bool xtrabackup_copy_logfile() { - mysql_mutex_assert_owner(&log_sys.mutex); + mysql_mutex_assert_owner(&recv_sys.mutex); DBUG_EXECUTE_IF("log_checksum_mismatch", return false;); ut_a(dst_log_file); @@ -2980,7 +2979,6 @@ static bool xtrabackup_copy_logfile() const size_t sequence_offset{log_sys.is_encrypted() ? 8U + 5U : 5U}; const size_t block_size_1{log_sys.get_block_size() - 1}; - mysql_mutex_lock(&recv_sys.mutex); #ifdef HAVE_PMEM if (log_sys.is_pmem()) { @@ -3127,7 +3125,6 @@ static bool xtrabackup_copy_logfile() #ifdef HAVE_PMEM write_error: #endif - mysql_mutex_unlock(&recv_sys.mutex); msg("Error: write to ib_logfile0 failed"); return true; } @@ -3148,12 +3145,11 @@ static bool xtrabackup_copy_logfile() if (recv_sys.offset < log_sys.get_block_size()) break; - mysql_mutex_unlock(&recv_sys.mutex); - if (xtrabackup_throttle && io_ticket-- < 0) - mysql_cond_wait(&wait_throttle, &log_sys.mutex); + mysql_cond_wait(&wait_throttle, &recv_sys.mutex); retry_count= 0; + continue; } else { @@ -3173,7 +3169,6 @@ static bool xtrabackup_copy_logfile() mysql_mutex_lock(&recv_sys.mutex); } - mysql_mutex_unlock(&recv_sys.mutex); msg(">> log scanned up to (" LSN_PF ")", recv_sys.lsn); return false; } @@ -3204,16 +3199,16 @@ extern lsn_t server_lsn_after_lock; static void log_copying_thread() { my_thread_init(); - mysql_mutex_lock(&log_sys.mutex); + mysql_mutex_lock(&recv_sys.mutex); while (!xtrabackup_copy_logfile() && (!metadata_to_lsn || metadata_to_lsn > recv_sys.lsn)) { timespec abstime; set_timespec_nsec(abstime, 1000ULL * xtrabackup_log_copy_interval); - mysql_cond_timedwait(&log_copying_stop, &log_sys.mutex, &abstime); + mysql_cond_timedwait(&log_copying_stop, &recv_sys.mutex, &abstime); } log_copying_running= false; - mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); my_thread_end(); } @@ -3226,13 +3221,13 @@ static void *io_watching_thread(void*) /* currently, for --backup only */ ut_a(xtrabackup_backup); - mysql_mutex_lock(&log_sys.mutex); + mysql_mutex_lock(&recv_sys.mutex); while (log_copying_running && !metadata_to_lsn) { timespec abstime; set_timespec(abstime, 1); - mysql_cond_timedwait(&log_copying_stop, &log_sys.mutex, &abstime); + mysql_cond_timedwait(&log_copying_stop, &recv_sys.mutex, &abstime); io_ticket= xtrabackup_throttle; mysql_cond_broadcast(&wait_throttle); } @@ -3240,7 +3235,7 @@ static void *io_watching_thread(void*) /* stop io throttle */ xtrabackup_throttle= 0; mysql_cond_broadcast(&wait_throttle); - mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); return nullptr; } @@ -4512,7 +4507,7 @@ static void stop_backup_threads(bool running) @return whether the operation succeeded */ static bool xtrabackup_backup_low() { - mysql_mutex_lock(&log_sys.mutex); + mysql_mutex_lock(&recv_sys.mutex); ut_ad(!metadata_to_lsn); /* read the latest checkpoint lsn */ @@ -4531,19 +4526,19 @@ static bool xtrabackup_backup_low() recv_sys.lsn = lsn; mysql_cond_broadcast(&log_copying_stop); const bool running= log_copying_running; - mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); stop_backup_threads(running); - mysql_mutex_lock(&log_sys.mutex); + mysql_mutex_lock(&recv_sys.mutex); } if (metadata_to_lsn && xtrabackup_copy_logfile()) { - mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); ds_close(dst_log_file); dst_log_file = NULL; return false; } - mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); if (ds_close(dst_log_file) || !metadata_to_lsn) { dst_log_file = NULL; @@ -4632,10 +4627,10 @@ static bool xtrabackup_backup_func() if(innodb_init_param()) { fail: if (log_copying_running) { - mysql_mutex_lock(&log_sys.mutex); + mysql_mutex_lock(&recv_sys.mutex); metadata_to_lsn = 1; mysql_cond_broadcast(&log_copying_stop); - mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); stop_backup_threads(true); } @@ -4692,12 +4687,12 @@ fail: log_sys.create(); /* get current checkpoint_lsn */ - mysql_mutex_lock(&log_sys.mutex); + mysql_mutex_lock(&recv_sys.mutex); if (recv_sys.find_checkpoint() != DB_SUCCESS) { msg("Error: cannot read redo log header"); unlock_and_fail: - mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); free_and_fail: aligned_free(const_cast(field_ref_zero)); field_ref_zero = nullptr; @@ -4710,7 +4705,7 @@ free_and_fail: } recv_needed_recovery = true; - mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); /* create extra LSN dir if it does not exist. */ if (xtrabackup_extra_lsndir @@ -4772,12 +4767,12 @@ free_and_fail: /* copy log file by current position */ - mysql_mutex_lock(&log_sys.mutex); + mysql_mutex_lock(&recv_sys.mutex); recv_sys.lsn = log_sys.next_checkpoint_lsn; const bool log_copy_failed = xtrabackup_copy_logfile(); - mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); if (log_copy_failed) { log_copying_running = false; @@ -5212,7 +5207,7 @@ xb_delta_open_matching_space( return OS_FILE_CLOSED; } - mysql_mutex_lock(&log_sys.mutex); + mysql_mutex_lock(&recv_sys.mutex); if (!fil_is_user_tablespace_id(info.space_id)) { found: /* open the file and return its handle */ @@ -5225,7 +5220,7 @@ found: msg("mariabackup: Cannot open file %s\n", real_name); } exit: - mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_unlock(&recv_sys.mutex); return file; } diff --git a/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result b/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result index c1771fe534b..3c3e4831d8a 100644 --- a/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result +++ b/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result @@ -20,7 +20,7 @@ AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS FOUND 1 /InnoDB: Upgrade after a crash is not supported. This redo log was created before MariaDB 10\.2\.2, and we did not find a valid checkpoint/ in mysqld.1.err # empty redo log from before MariaDB 10.2.2 -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -28,7 +28,7 @@ COUNT(*) 1 FOUND 1 /InnoDB: Upgrading redo log:/ in mysqld.1.err # Corrupted multi-file redo log from before MariaDB 10.2.2 -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -36,7 +36,7 @@ COUNT(*) 0 FOUND 1 /InnoDB: Upgrade after a crash is not supported. This redo log was created before MariaDB 10\.2\.2, and it appears corrupted/ in mysqld.1.err # Empty multi-file redo log (wrong offset) from before MariaDB 10.2.2 -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -45,7 +45,7 @@ COUNT(*) FOUND 3 /Upgrade after a crash is not supported. This redo log was created before MariaDB 10\.2\.2, and we did not find a valid checkpoint\./ in mysqld.1.err # Multi-file redo log with size mismatch from after MariaDB 10.2.2 # Corrupted multi-file redo log from after MariaDB 10.2.2 -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -53,15 +53,16 @@ COUNT(*) 0 FOUND 3 /Upgrade after a crash is not supported. This redo log was created before MariaDB 10\.2\.2, and we did not find a valid checkpoint\./ in mysqld.1.err FOUND 1 /InnoDB: No valid checkpoint was found; the log was created with BogoDB 1\.2\.3\.4\./ in mysqld.1.err -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); COUNT(*) 0 +FOUND 1 /InnoDB: Log file .*ib_logfile1 is of different size 2097152 bytes than other log files 1048576 bytes!/ in mysqld.1.err FOUND 2 /InnoDB: No valid checkpoint was found; the log was created with BogoDB 1\.2\.3\.4\./ in mysqld.1.err # Empty multi-file redo log from after MariaDB 10.2.2 -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -74,6 +75,7 @@ SELECT * FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +FOUND 1 /InnoDB: Log file .*ib_logfile1 is of different size 2097152 bytes than other log files 4194304 bytes!/ in mysqld.1.err FOUND 1 /InnoDB: Invalid log header checksum/ in mysqld.1.err # distant future redo log format, with valid header checksum # restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption @@ -165,7 +167,7 @@ AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS FOUND 3 /\[ERROR\] InnoDB: Upgrade after a crash is not supported\. The redo log was created with MariaDB 10\.3\.1\./ in mysqld.1.err # Empty 10.3 redo log -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -173,7 +175,7 @@ COUNT(*) 1 FOUND 1 /InnoDB: log sequence number 1213964\b.*; transaction id 0/ in mysqld.1.err # Empty 10.2 redo log -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -181,7 +183,7 @@ COUNT(*) 1 FOUND 3 /InnoDB: Upgrading redo log:/ in mysqld.1.err # Empty 10.5 redo log -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); diff --git a/mysql-test/suite/innodb/r/log_corruption.result b/mysql-test/suite/innodb/r/log_corruption.result index 12e9d340bbd..df58a90dcec 100644 --- a/mysql-test/suite/innodb/r/log_corruption.result +++ b/mysql-test/suite/innodb/r/log_corruption.result @@ -20,7 +20,7 @@ AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS FOUND 1 /InnoDB: Upgrade after a crash is not supported. This redo log was created before MariaDB 10\.2\.2, and we did not find a valid checkpoint/ in mysqld.1.err # empty redo log from before MariaDB 10.2.2 -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -28,7 +28,7 @@ COUNT(*) 1 FOUND 1 /InnoDB: Upgrading redo log:/ in mysqld.1.err # Corrupted multi-file redo log from before MariaDB 10.2.2 -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -36,7 +36,7 @@ COUNT(*) 0 FOUND 1 /InnoDB: Upgrade after a crash is not supported. This redo log was created before MariaDB 10\.2\.2, and it appears corrupted/ in mysqld.1.err # Empty multi-file redo log (wrong offset) from before MariaDB 10.2.2 -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -45,7 +45,7 @@ COUNT(*) FOUND 3 /Upgrade after a crash is not supported. This redo log was created before MariaDB 10\.2\.2, and we did not find a valid checkpoint\./ in mysqld.1.err # Multi-file redo log with size mismatch from after MariaDB 10.2.2 # Corrupted multi-file redo log from after MariaDB 10.2.2 -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -53,15 +53,16 @@ COUNT(*) 0 FOUND 3 /Upgrade after a crash is not supported. This redo log was created before MariaDB 10\.2\.2, and we did not find a valid checkpoint\./ in mysqld.1.err FOUND 1 /InnoDB: No valid checkpoint was found; the log was created with BogoDB 1\.2\.3\.4\./ in mysqld.1.err -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); COUNT(*) 0 +FOUND 1 /InnoDB: Log file .*ib_logfile1 is of different size 2097152 bytes than other log files 1048576 bytes!/ in mysqld.1.err FOUND 2 /InnoDB: No valid checkpoint was found; the log was created with BogoDB 1\.2\.3\.4\./ in mysqld.1.err # Empty multi-file redo log from after MariaDB 10.2.2 -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -74,6 +75,7 @@ SELECT * FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +FOUND 1 /InnoDB: Log file .*ib_logfile1 is of different size 2097152 bytes than other log files 4194304 bytes!/ in mysqld.1.err FOUND 1 /InnoDB: Invalid log header checksum/ in mysqld.1.err # distant future redo log format, with valid header checksum # restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption @@ -165,7 +167,7 @@ AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS FOUND 3 /\[ERROR\] InnoDB: Upgrade after a crash is not supported\. The redo log was created with MariaDB 10\.3\.1\./ in mysqld.1.err # Empty 10.3 redo log -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -173,7 +175,7 @@ COUNT(*) 1 FOUND 1 /InnoDB: log sequence number 1213964\b.*; transaction id 0/ in mysqld.1.err # Empty 10.2 redo log -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -181,7 +183,7 @@ COUNT(*) 1 FOUND 3 /InnoDB: Upgrading redo log:/ in mysqld.1.err # Empty 10.5 redo log -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=2m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); diff --git a/mysql-test/suite/innodb/r/log_file_size.result b/mysql-test/suite/innodb/r/log_file_size.result index 47ca20af2f4..7b4935e1ab1 100644 --- a/mysql-test/suite/innodb/r/log_file_size.result +++ b/mysql-test/suite/innodb/r/log_file_size.result @@ -1,5 +1,5 @@ CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB; -# restart: --innodb-log-file-size=2m +# restart: --innodb-log-file-size=4m CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK diff --git a/mysql-test/suite/innodb/t/log_corruption.test b/mysql-test/suite/innodb/t/log_corruption.test index dad5f67afca..7d351bf565c 100644 --- a/mysql-test/suite/innodb/t/log_corruption.test +++ b/mysql-test/suite/innodb/t/log_corruption.test @@ -16,7 +16,7 @@ call mtr.add_suppression("InnoDB: Log scan aborted at LSN"); call mtr.add_suppression("InnoDB: Missing MLOG_FILE_NAME or MLOG_FILE_DELETE before MLOG_CHECKPOINT for tablespace 42\\r?$"); call mtr.add_suppression("InnoDB: Obtaining redo log encryption key version 1 failed"); call mtr.add_suppression("InnoDB: Decrypting checkpoint failed"); -call mtr.add_suppression("InnoDB: Log file .*ib_logfile1 is of different size 2097152 bytes than other log files 1048576 bytes!"); +call mtr.add_suppression("InnoDB: Log file .*ib_logfile1 is of different size 2097152 bytes than other log files (1048576|4194304) bytes!"); --enable_query_log let bugdir= $MYSQLTEST_VARDIR/tmp/log_corruption; @@ -161,7 +161,7 @@ die unless seek(OUT, 0x800, 0); print OUT pack("NnnNx[496]N", 0x80000944, 12, 12, 0, 0xb2a); close OUT or die; EOF ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=2m +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' @@ -191,7 +191,7 @@ print OUT chr(0) x 2048; close OUT or die; EOF ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=2m +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' @@ -210,7 +210,7 @@ print OUT chr(0) x 1536; close OUT or die; EOF ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=2m +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' @@ -241,7 +241,7 @@ close OUT or die; EOF --echo # Corrupted multi-file redo log from after MariaDB 10.2.2 ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=2m +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' @@ -259,13 +259,14 @@ print OUT chr(0); close OUT or die; EOF ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=2m +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); --source include/shutdown_mysqld.inc --let SEARCH_PATTERN=InnoDB: Log file .*ib_logfile1 is of different size 2097152 bytes than other log files 1048576 bytes! +--source include/search_pattern_in_file.inc --let SEARCH_PATTERN=InnoDB: No valid checkpoint was found; the log was created with BogoDB 1\\.2\\.3\\.4\\. --source include/search_pattern_in_file.inc @@ -282,7 +283,7 @@ print OUT $_, pack("N", mycrc32($_, 0, $polynomial)); close OUT or die; EOF ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=2m +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' @@ -304,6 +305,8 @@ EOF --source include/start_mysqld.inc eval $check_no_innodb; --source include/shutdown_mysqld.inc +--let SEARCH_PATTERN=InnoDB: Log file .*ib_logfile1 is of different size 2097152 bytes than other log files 4194304 bytes! +--source include/search_pattern_in_file.inc let SEARCH_PATTERN=InnoDB: Invalid log header checksum; --source include/search_pattern_in_file.inc @@ -351,6 +354,9 @@ die unless seek(OUT, 0x210, 0); print OUT pack("NNx[264]", 0, 0x80c); print OUT pack("NNx[212]N", 0x590dbaac, 0xfe922582, 0xc72d49c4); close OUT or die; +die unless open OUT, ">", "$ENV{bugdir}/ib_logfile1"; +print OUT pack("x[4194304]"); +close OUT or die; EOF # Anything below innodb_force_recovery=6 must find a valid redo log. # Missing tablespace files are tolerated already with innodb_force_recovery=1. @@ -533,7 +539,7 @@ print OUT pack("NnnNx[496]N", 0x80000944, 12, 12, 1, 0x46c8a2a2); close OUT or die; EOF ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=2m +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' diff --git a/mysql-test/suite/innodb/t/log_file_size.test b/mysql-test/suite/innodb/t/log_file_size.test index 22838475c60..261b024fa41 100644 --- a/mysql-test/suite/innodb/t/log_file_size.test +++ b/mysql-test/suite/innodb/t/log_file_size.test @@ -55,7 +55,7 @@ let $check_no_innodb=SELECT * FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); ---let $restart_parameters= --innodb-log-file-size=2m +--let $restart_parameters= --innodb-log-file-size=4m --source include/start_mysqld.inc let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; diff --git a/mysql-test/suite/mariabackup/innodb_redo_log_overwrite.opt b/mysql-test/suite/mariabackup/innodb_redo_log_overwrite.opt index 95b88d038ee..a0b4b588279 100644 --- a/mysql-test/suite/mariabackup/innodb_redo_log_overwrite.opt +++ b/mysql-test/suite/mariabackup/innodb_redo_log_overwrite.opt @@ -1 +1 @@ ---loose-innodb-log-file-size=2097152 +--loose-innodb-log-file-size=4194304 diff --git a/mysql-test/suite/mariabackup/innodb_redo_overwrite.opt b/mysql-test/suite/mariabackup/innodb_redo_overwrite.opt index 19c08c8c945..a0b4b588279 100644 --- a/mysql-test/suite/mariabackup/innodb_redo_overwrite.opt +++ b/mysql-test/suite/mariabackup/innodb_redo_overwrite.opt @@ -1 +1 @@ ---loose-innodb-log-file-size=2m +--loose-innodb-log-file-size=4194304 diff --git a/mysql-test/suite/mariabackup/innodb_redo_overwrite.result b/mysql-test/suite/mariabackup/innodb_redo_overwrite.result index abc0c57bcce..bab5d4331e0 100644 --- a/mysql-test/suite/mariabackup/innodb_redo_overwrite.result +++ b/mysql-test/suite/mariabackup/innodb_redo_overwrite.result @@ -1,5 +1,4 @@ -CREATE TABLE t(i INT) ENGINE=INNODB; -INSERT INTO t SELECT seq%10 FROM seq_0_to_51199; +CREATE TABLE t ENGINE=INNODB SELECT seq%10 i FROM seq_0_to_204796; # xtrabackup backup FOUND 1 /Was only able to copy log from \d+ to \d+, not \d+; try increasing innodb_log_file_size\b/ in backup.log NOT FOUND /failed: redo log block checksum does not match/ in backup.log diff --git a/mysql-test/suite/mariabackup/innodb_redo_overwrite.test b/mysql-test/suite/mariabackup/innodb_redo_overwrite.test index 7e0c1cef54f..34198222cea 100644 --- a/mysql-test/suite/mariabackup/innodb_redo_overwrite.test +++ b/mysql-test/suite/mariabackup/innodb_redo_overwrite.test @@ -3,8 +3,7 @@ --source include/have_debug.inc --source include/have_sequence.inc -CREATE TABLE t(i INT) ENGINE=INNODB; -INSERT INTO t SELECT seq%10 FROM seq_0_to_51199; +CREATE TABLE t ENGINE=INNODB SELECT seq%10 i FROM seq_0_to_204796; --echo # xtrabackup backup --let $targetdir=$MYSQLTEST_VARDIR/tmp/backup diff --git a/mysql-test/suite/perfschema/r/sxlock_func.result b/mysql-test/suite/perfschema/r/sxlock_func.result index 8f4c2fdcece..56a3a3f37b3 100644 --- a/mysql-test/suite/perfschema/r/sxlock_func.result +++ b/mysql-test/suite/perfschema/r/sxlock_func.result @@ -10,6 +10,7 @@ name wait/synch/rwlock/innodb/dict_operation_lock wait/synch/rwlock/innodb/fil_space_latch wait/synch/rwlock/innodb/lock_latch +wait/synch/rwlock/innodb/log_latch wait/synch/rwlock/innodb/trx_i_s_cache_lock wait/synch/rwlock/innodb/trx_purge_latch TRUNCATE TABLE performance_schema.events_waits_history_long; @@ -41,6 +42,7 @@ ORDER BY event_name; event_name wait/synch/rwlock/innodb/fil_space_latch wait/synch/rwlock/innodb/lock_latch +wait/synch/rwlock/innodb/log_latch SELECT event_name FROM performance_schema.events_waits_history_long WHERE event_name = 'wait/synch/sxlock/innodb/index_tree_rw_lock' AND operation IN ('try_shared_lock','shared_lock') LIMIT 1; diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 40702bca323..69986f31882 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -1011,7 +1011,7 @@ DEFAULT_VALUE 100663296 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED VARIABLE_COMMENT Redo log size in bytes. -NUMERIC_MIN_VALUE 1048576 +NUMERIC_MIN_VALUE 4194304 NUMERIC_MAX_VALUE 18446744073709551615 NUMERIC_BLOCK_SIZE 4096 ENUM_VALUE_LIST NULL diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 0ca0ab56052..220997758e9 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -115,6 +115,7 @@ static void buf_flush_validate_skip() /** Wake up the page cleaner if needed */ void buf_pool_t::page_cleaner_wakeup() { + ut_d(buf_flush_validate_skip()); if (!page_cleaner_idle()) return; double dirty_pct= double(UT_LIST_GET_LEN(buf_pool.flush_list)) * 100.0 / @@ -155,7 +156,7 @@ void buf_pool_t::page_cleaner_wakeup() } } -inline void buf_pool_t::delete_from_flush_list_low(buf_page_t *bpage) +inline void buf_pool_t::delete_from_flush_list_low(buf_page_t *bpage) noexcept { ut_ad(!fsp_is_system_temporary(bpage->id().space())); mysql_mutex_assert_owner(&flush_list_mutex); @@ -166,13 +167,9 @@ inline void buf_pool_t::delete_from_flush_list_low(buf_page_t *bpage) /** Insert a modified block into the flush list. @param block modified block @param lsn start LSN of the mini-transaction that modified the block */ -void buf_pool_t::insert_into_flush_list(buf_block_t *block, lsn_t lsn) +void buf_pool_t::insert_into_flush_list(buf_block_t *block, lsn_t lsn) noexcept { - mysql_mutex_assert_not_owner(&mutex); -#ifdef SAFE_MUTEX - if (!recv_recovery_is_on()) - mysql_mutex_assert_owner(&log_sys.flush_order_mutex); -#endif /* SAFE_MUTEX */ + ut_ad(recv_recovery_is_on() || log_sys.latch.is_locked()); ut_ad(lsn > 2); static_assert(log_t::FIRST_LSN >= 2, "compatibility"); ut_ad(!fsp_is_system_temporary(block->page.id().space())); @@ -191,16 +188,27 @@ void buf_pool_t::insert_into_flush_list(buf_block_t *block, lsn_t lsn) MEM_CHECK_DEFINED(block->page.zip.data ? block->page.zip.data : block->page.frame, block->physical_size()); - UT_LIST_ADD_FIRST(flush_list, &block->page); - ut_d(buf_flush_validate_skip()); - page_cleaner_wakeup(); + if (buf_page_t *prev= UT_LIST_GET_FIRST(flush_list)) + { + if (prev->oldest_modification() <= lsn) + goto insert_first; + while (buf_page_t *next= UT_LIST_GET_NEXT(list, prev)) + if (next->oldest_modification() <= lsn) + break; + else + prev= next; + UT_LIST_INSERT_AFTER(flush_list, prev, &block->page); + } + else + insert_first: + UT_LIST_ADD_FIRST(flush_list, &block->page); mysql_mutex_unlock(&flush_list_mutex); } /** Remove a block from flush_list. @param bpage buffer pool page @param clear whether to invoke buf_page_t::clear_oldest_modification() */ -void buf_pool_t::delete_from_flush_list(buf_page_t *bpage, bool clear) +void buf_pool_t::delete_from_flush_list(buf_page_t *bpage, bool clear) noexcept { delete_from_flush_list_low(bpage); stat.flush_list_bytes-= bpage->physical_size(); @@ -743,7 +751,7 @@ not_compressed: } /** Free a page whose underlying file page has been freed. */ -inline void buf_pool_t::release_freed_page(buf_page_t *bpage) +inline void buf_pool_t::release_freed_page(buf_page_t *bpage) noexcept { mysql_mutex_assert_owner(&mutex); mysql_mutex_lock(&flush_list_mutex); @@ -1696,12 +1704,12 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept #endif { n_pending_checkpoint_writes++; - mysql_mutex_unlock(&mutex); + latch.wr_unlock(); /* FIXME: issue an asynchronous write */ log.write(offset, {c, get_block_size()}); if (srv_file_flush_method != SRV_O_DSYNC) ut_a(log.flush()); - mysql_mutex_lock(&mutex); + latch.wr_lock(SRW_LOCK_CALL); n_pending_checkpoint_writes--; } @@ -1712,7 +1720,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF ", flushed to " LSN_PF, next_checkpoint_lsn, get_flushed_lsn())); - mysql_mutex_unlock(&mutex); + latch.wr_unlock(); } /** Initiate a log checkpoint, discarding the start of the log. @@ -1722,7 +1730,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) { ut_ad(!srv_read_only_mode); - mysql_mutex_assert_owner(&log_sys.mutex); + ut_ad(log_sys.latch.is_write_locked()); ut_ad(oldest_lsn <= end_lsn); ut_ad(end_lsn == log_sys.get_lsn()); ut_ad(!recv_no_log_write); @@ -1735,7 +1743,7 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) /* Do nothing, because nothing was logged (other than a FILE_CHECKPOINT record) since the previous checkpoint. */ do_nothing: - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); return true; } @@ -1748,13 +1756,14 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) It is important that we write out the redo log before any further dirty pages are flushed to the tablespace files. At this point, - because we hold log_sys.mutex, mtr_t::commit() in other threads will - be blocked, and no pages can be added to the flush lists. */ + because we hold exclusive log_sys.latch, + mtr_t::commit() in other threads will be blocked, + and no pages can be added to buf_pool.flush_list. */ const lsn_t flush_lsn{fil_names_clear(oldest_lsn)}; ut_ad(flush_lsn >= end_lsn + SIZE_OF_FILE_CHECKPOINT); - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); log_write_up_to(flush_lsn, true); - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); if (log_sys.last_checkpoint_lsn >= oldest_lsn) goto do_nothing; @@ -1763,13 +1772,12 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) if (log_sys.n_pending_checkpoint_writes) { /* A checkpoint write is running */ - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); return false; } log_sys.next_checkpoint_lsn= oldest_lsn; log_sys.write_checkpoint(end_lsn); - mysql_mutex_assert_not_owner(&log_sys.mutex); return true; } @@ -1793,12 +1801,10 @@ static bool log_checkpoint() fil_flush_file_spaces(); } - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); const lsn_t end_lsn= log_sys.get_lsn(); - mysql_mutex_lock(&log_sys.flush_order_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex); const lsn_t oldest_lsn= buf_pool.get_oldest_modification(end_lsn); - mysql_mutex_unlock(&log_sys.flush_order_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex); return log_checkpoint_low(oldest_lsn, end_lsn); } @@ -1835,7 +1841,6 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) { ut_ad(sync_lsn); ut_ad(sync_lsn < LSN_MAX); - mysql_mutex_assert_not_owner(&log_sys.mutex); ut_ad(!srv_read_only_mode); if (recv_recovery_is_on()) @@ -1893,7 +1898,6 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) @param furious true=furious flushing, false=limit to innodb_io_capacity */ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious) { - mysql_mutex_assert_not_owner(&log_sys.mutex); ut_ad(!srv_read_only_mode); if (recv_recovery_is_on()) @@ -1952,11 +1956,9 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn) fil_flush_file_spaces(); } - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); const lsn_t newest_lsn= log_sys.get_lsn(); - mysql_mutex_lock(&log_sys.flush_order_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex); - mysql_mutex_unlock(&log_sys.flush_order_mutex); lsn_t measure= buf_pool.get_oldest_modification(0); const lsn_t checkpoint_lsn= measure ? measure : newest_lsn; @@ -1970,13 +1972,11 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn) } else { - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); if (!measure) measure= LSN_MAX; } - mysql_mutex_assert_not_owner(&log_sys.mutex); - /* After attempting log checkpoint, check if we have reached our target. */ const lsn_t target= buf_flush_sync_lsn; diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index ce6acde05f6..94bac460333 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -881,18 +881,21 @@ bool fil_space_free(uint32_t id, bool x_latched) } if (!recv_recovery_is_on()) { - mysql_mutex_lock(&log_sys.mutex); - } + log_sys.latch.wr_lock(SRW_LOCK_CALL); - mysql_mutex_assert_owner(&log_sys.mutex); + if (space->max_lsn) { + ut_d(space->max_lsn = 0); + fil_system.named_spaces.remove(*space); + } - if (space->max_lsn != 0) { - ut_d(space->max_lsn = 0); - fil_system.named_spaces.remove(*space); - } + log_sys.latch.wr_unlock(); + } else { + ut_ad(log_sys.latch.is_write_locked()); - if (!recv_recovery_is_on()) { - mysql_mutex_unlock(&log_sys.mutex); + if (space->max_lsn) { + ut_d(space->max_lsn = 0); + fil_system.named_spaces.remove(*space); + } } fil_space_free_low(space); @@ -1474,9 +1477,9 @@ static void fil_name_write_rename_low(uint32_t space_id, const char *old_name, static void fil_name_commit_durable(mtr_t *mtr) { - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); auto lsn= mtr->commit_files(); - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); log_write_up_to(lsn, true); } @@ -1647,13 +1650,13 @@ pfs_os_file_t fil_delete_tablespace(uint32_t id) handle= fil_system.detach(space, true); mysql_mutex_unlock(&fil_system.mutex); - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); if (space->max_lsn) { ut_d(space->max_lsn = 0); fil_system.named_spaces.remove(*space); } - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); fil_space_free_low(space); } @@ -1853,11 +1856,12 @@ static bool fil_rename_tablespace(uint32_t id, const char *old_path, ut_ad(strchr(new_file_name, '/')); if (!recv_recovery_is_on()) { - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); } - /* log_sys.mutex is above fil_system.mutex in the latching order */ - mysql_mutex_assert_owner(&log_sys.mutex); + /* log_sys.latch is above fil_system.mutex in the latching order */ + ut_ad(log_sys.latch.is_write_locked() || + srv_operation == SRV_OPERATION_RESTORE_DELTA); mysql_mutex_lock(&fil_system.mutex); space->release(); ut_ad(node->name == old_file_name); @@ -1880,7 +1884,7 @@ skip_second_rename: } if (!recv_recovery_is_on()) { - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); } mysql_mutex_unlock(&fil_system.mutex); @@ -3031,7 +3035,7 @@ void fil_names_dirty( fil_space_t* space) { - mysql_mutex_assert_owner(&log_sys.mutex); + ut_ad(log_sys.latch.is_write_locked()); ut_ad(recv_recovery_is_on()); ut_ad(log_sys.get_lsn() != 0); ut_ad(space->max_lsn == 0); @@ -3045,7 +3049,7 @@ fil_names_dirty( tablespace was modified for the first time since fil_names_clear(). */ ATTRIBUTE_NOINLINE ATTRIBUTE_COLD void mtr_t::name_write() { - mysql_mutex_assert_owner(&log_sys.mutex); + ut_ad(log_sys.latch.is_write_locked()); ut_d(fil_space_validate_for_mtr_commit(m_user_space)); ut_ad(!m_user_space->max_lsn); m_user_space->max_lsn= log_sys.get_lsn(); @@ -3073,7 +3077,7 @@ lsn_t fil_names_clear(lsn_t lsn) { mtr_t mtr; - mysql_mutex_assert_owner(&log_sys.mutex); + ut_ad(log_sys.latch.is_write_locked()); ut_ad(lsn); ut_ad(log_sys.is_latest()); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4f3c3041455..a8ed2dd6bfb 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -242,10 +242,10 @@ static void innodb_max_purge_lag_wait_update(THD *thd, st_mysql_sys_var *, if (thd_kill_level(thd)) break; /* Adjust for purge_coordinator_state::refresh() */ - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.rd_lock(SRW_LOCK_CALL); const lsn_t last= log_sys.last_checkpoint_lsn, max_age= log_sys.max_checkpoint_age; - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.rd_unlock(); const lsn_t lsn= log_sys.get_lsn(); if ((lsn - last) / 4 >= max_age / 5) buf_flush_ahead(last + max_age / 5, false); @@ -534,8 +534,6 @@ mysql_pfs_key_t fts_pll_tokenize_mutex_key; mysql_pfs_key_t ibuf_bitmap_mutex_key; mysql_pfs_key_t ibuf_mutex_key; mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; -mysql_pfs_key_t log_sys_mutex_key; -mysql_pfs_key_t log_flush_order_mutex_key; mysql_pfs_key_t recalc_pool_mutex_key; mysql_pfs_key_t purge_sys_pq_mutex_key; mysql_pfs_key_t recv_sys_mutex_key; @@ -571,12 +569,10 @@ static PSI_mutex_info all_innodb_mutexes[] = { PSI_KEY(fts_cache_init_mutex), PSI_KEY(fts_delete_mutex), PSI_KEY(fts_doc_id_mutex), - PSI_KEY(log_flush_order_mutex), PSI_KEY(ibuf_bitmap_mutex), PSI_KEY(ibuf_mutex), PSI_KEY(ibuf_pessimistic_insert_mutex), PSI_KEY(index_online_log), - PSI_KEY(log_sys_mutex), PSI_KEY(page_zip_stat_per_index_mutex), PSI_KEY(purge_sys_pq_mutex), PSI_KEY(recv_sys_mutex), @@ -603,6 +599,7 @@ mysql_pfs_key_t fil_space_latch_key; mysql_pfs_key_t trx_i_s_cache_lock_key; mysql_pfs_key_t trx_purge_latch_key; mysql_pfs_key_t lock_latch_key; +mysql_pfs_key_t log_latch_key; /* all_innodb_rwlocks array contains rwlocks that are performance schema instrumented if "UNIV_PFS_RWLOCK" @@ -617,6 +614,7 @@ static PSI_rwlock_info all_innodb_rwlocks[] = { &trx_i_s_cache_lock_key, "trx_i_s_cache_lock", 0 }, { &trx_purge_latch_key, "trx_purge_latch", 0 }, { &lock_latch_key, "lock_latch", 0 }, + { &log_latch_key, "log_latch", 0 }, { &index_tree_rw_lock_key, "index_tree_rw_lock", PSI_RWLOCK_FLAG_SX } }; # endif /* UNIV_PFS_RWLOCK */ @@ -949,8 +947,7 @@ static SHOW_VAR innodb_status_variables[]= { {"buffer_pool_reads", &export_vars.innodb_buffer_pool_reads, SHOW_SIZE_T}, {"buffer_pool_wait_free", &buf_pool.stat.LRU_waits, SHOW_SIZE_T}, - {"buffer_pool_write_requests", - &export_vars.innodb_buffer_pool_write_requests, SHOW_SIZE_T}, + {"buffer_pool_write_requests", &buf_pool.flush_list_requests, SHOW_SIZE_T}, {"checkpoint_age", &export_vars.innodb_checkpoint_age, SHOW_SIZE_T}, {"checkpoint_max_age", &export_vars.innodb_checkpoint_max_age, SHOW_SIZE_T}, {"data_fsyncs", (size_t*) &os_n_fsyncs, SHOW_SIZE_T}, @@ -19235,7 +19232,7 @@ static MYSQL_SYSVAR_SIZE_T(log_buffer_size, log_sys.buf_size, static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Redo log size in bytes.", - NULL, NULL, 96 << 20, 1U << 20, std::numeric_limits::max(), 4096); + NULL, NULL, 96 << 20, 4 << 20, std::numeric_limits::max(), 4096); static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct, PLUGIN_VAR_RQCMDARG, diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 703087d1b7f..88d1989bc73 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1755,6 +1755,9 @@ public: FlushHp flush_hp; /** modified blocks (a subset of LRU) */ UT_LIST_BASE_NODE_T(buf_page_t) flush_list; + /** number of blocks ever added to flush_list; + protected by flush_list_mutex */ + size_t flush_list_requests; private: /** whether the page cleaner needs wakeup from indefinite sleep */ bool page_cleaner_is_idle; @@ -1765,7 +1768,7 @@ public: pthread_cond_t do_flush_list; /** @return whether the page cleaner must sleep due to being idle */ - bool page_cleaner_idle() const + bool page_cleaner_idle() const noexcept { mysql_mutex_assert_owner(&flush_list_mutex); return page_cleaner_is_idle; @@ -1885,24 +1888,24 @@ public: private: /** Remove a block from the flush list. */ - inline void delete_from_flush_list_low(buf_page_t *bpage); + inline void delete_from_flush_list_low(buf_page_t *bpage) noexcept; /** Remove a block from flush_list. @param bpage buffer pool page @param clear whether to invoke buf_page_t::clear_oldest_modification() */ - void delete_from_flush_list(buf_page_t *bpage, bool clear); + void delete_from_flush_list(buf_page_t *bpage, bool clear) noexcept; public: /** Remove a block from flush_list. @param bpage buffer pool page */ - void delete_from_flush_list(buf_page_t *bpage) + void delete_from_flush_list(buf_page_t *bpage) noexcept { delete_from_flush_list(bpage, true); } /** Insert a modified block into the flush list. @param block modified block @param lsn start LSN of the mini-transaction that modified the block */ - void insert_into_flush_list(buf_block_t *block, lsn_t lsn); + void insert_into_flush_list(buf_block_t *block, lsn_t lsn) noexcept; /** Free a page whose underlying file page has been freed. */ - inline void release_freed_page(buf_page_t *bpage); + inline void release_freed_page(buf_page_t *bpage) noexcept; private: /** Temporary memory for page_compressed and encrypted I/O */ diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index 665fd1115e7..af38f61b13b 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -103,33 +103,6 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn); @param furious true=furious flushing, false=limit to innodb_io_capacity */ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious); -/********************************************************************//** -This function should be called at a mini-transaction commit, if a page was -modified in it. Puts the block to the list of modified blocks, if it not -already in it. */ -inline void buf_flush_note_modification(buf_block_t *b, lsn_t start, lsn_t end) -{ - ut_ad(!srv_read_only_mode); - ut_d(const auto s= b->page.state()); - ut_ad(s > buf_page_t::FREED); - ut_ad(s < buf_page_t::READ_FIX); - ut_ad(mach_read_from_8(b->page.frame + FIL_PAGE_LSN) <= end); - mach_write_to_8(b->page.frame + FIL_PAGE_LSN, end); - if (UNIV_LIKELY_NULL(b->page.zip.data)) - memcpy_aligned<8>(FIL_PAGE_LSN + b->page.zip.data, - FIL_PAGE_LSN + b->page.frame, 8); - - const lsn_t oldest_modification= b->page.oldest_modification(); - - if (oldest_modification > 1) - ut_ad(oldest_modification <= start); - else if (fsp_is_system_temporary(b->page.id().space())) - b->page.set_temp_modified(); - else - buf_pool.insert_into_flush_list(b, start); - srv_stats.buf_pool_write_requests.inc(); -} - /** Initialize page_cleaner. */ ATTRIBUTE_COLD void buf_flush_page_cleaner_init(); diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 81005d43c8b..a05485696f6 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -354,13 +354,10 @@ struct fil_space_t final /** fil_system.spaces chain node */ fil_space_t *hash; - lsn_t max_lsn; - /*!< LSN of the most recent - fil_names_write_if_was_clean(). - Reset to 0 by fil_names_clear(). - Protected by log_sys.mutex. - If and only if this is nonzero, the - tablespace will be in named_spaces. */ + /** LSN of the most recent fil_names_write_if_was_clean(). + Reset to 0 by fil_names_clear(). Protected by exclusive log_sys.latch. + If and only if max_lsn is nonzero, this is in fil_system.named_spaces. */ + lsn_t max_lsn; /** tablespace identifier */ uint32_t id; /** whether undo tablespace truncation is in progress */ @@ -1043,7 +1040,7 @@ struct fil_node_t final { /** tablespace containing this file */ fil_space_t *space; - /** file name; protected by fil_system.mutex and log_sys.mutex */ + /** file name; protected by fil_system.mutex and exclusive log_sys.latch */ char *name; /** file handle */ pfs_os_file_t handle; @@ -1434,14 +1431,12 @@ public: /** nonzero if fil_node_open_file_low() should avoid moving the tablespace to the end of space_list, for FIFO policy of try_to_close() */ ulint freeze_space_list; + /** list of all tablespaces */ ilist space_list; - /*!< list of all file spaces */ + /** list of all tablespaces for which a FILE_MODIFY record has been written + since the latest redo log checkpoint. + Protected only by exclusive log_sys.latch. */ ilist named_spaces; - /*!< list of all file spaces - for which a FILE_MODIFY - record has been written since - the latest redo log checkpoint. - Protected only by log_sys.mutex. */ /** list of all ENCRYPTED=DEFAULT tablespaces that need to be converted to the current value of innodb_encrypt_tables */ diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 618bda3b87a..220d1680513 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -37,6 +37,7 @@ Created 12/9/1995 Heikki Tuuri #include "os0file.h" #include "span.h" #include "my_atomic_wrapper.h" +#include "srw_lock.h" #include using st_::span; @@ -57,16 +58,6 @@ static inline void delete_log_file(const char* suffix) os_file_delete_if_exists(innodb_log_file_key, path.c_str(), nullptr); } -/** Calculate the recommended highest values for lsn - last_checkpoint_lsn -and lsn - buf_pool.get_oldest_modification(). -@param[in] file_size requested innodb_log_file_size -@retval true on success -@retval false if the smallest log is too small to -accommodate the number of OS threads in the database server */ -bool -log_set_capacity(ulonglong file_size) - MY_ATTRIBUTE((warn_unused_result)); - struct completion_callback; /** Ensure that the log has been written to the log file up to a given @@ -83,10 +74,10 @@ void log_write_up_to(lsn_t lsn, bool durable, void log_buffer_flush_to_disk(bool durable= true); -/** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */ +/** Prepare to invoke log_write_and_flush(), before acquiring log_sys.latch. */ ATTRIBUTE_COLD void log_write_and_flush_prepare(); -/** Durably write the log up to log_sys.lsn() and release log_sys.mutex. */ +/** Durably write the log up to log_sys.get_lsn(). */ ATTRIBUTE_COLD void log_write_and_flush(); /** Make a checkpoint */ @@ -202,35 +193,38 @@ private: preflush buffer pool pages, or initiate a log checkpoint. This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */ std::atomic check_flush_or_checkpoint_; + public: - /** mutex protecting the log */ - MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex; + /** rw-lock protecting buf */ + MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) srw_lock latch; private: /** Last written LSN */ lsn_t write_lsn; public: - /** first free offset within the log buffer in use */ - size_t buf_free; - /** recommended maximum size of buf, after which the buffer is flushed */ - size_t max_buf_free; - /** mutex that ensures that inserts into buf_pool.flush_list are in - LSN order; allows mtr_t::commit() to release log_sys.mutex earlier */ - MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_order_mutex; /** log record buffer, written to by mtr_t::commit() */ byte *buf; /** buffer for writing data to ib_logfile0, or nullptr if is_pmem() In write_buf(), buf and flush_buf are swapped */ byte *flush_buf; - /** number of write requests (to buf); protected by mutex */ - ulint write_to_buf; /** number of std::swap(buf, flush_buf) and writes from buf to log; - protected by mutex */ + protected by latch.wr_lock() */ ulint write_to_log; - /** number of waits in append_prepare() */ - ulint waits; /** innodb_log_buffer_size (size of buf and flush_buf, in bytes) */ size_t buf_size; +private: + /** spin lock protecting lsn, buf_free in append_prepare() */ + MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) sspin_lock lsn_lock; +public: + /** first free offset within buf use; protected by lsn_lock */ + Atomic_relaxed buf_free; + /** number of write requests (to buf); protected by exclusive lsn_lock */ + ulint write_to_buf; + /** number of waits in append_prepare(); protected by lsn_lock */ + ulint waits; + /** recommended maximum size of buf, after which the buffer is flushed */ + size_t max_buf_free; + /** log file size in bytes, including the header */ lsn_t file_size; private: @@ -272,11 +266,11 @@ public: /*!< this is the maximum allowed value for lsn - last_checkpoint_lsn when a new query step is started */ - /** latest completed checkpoint (protected by log_sys.mutex) */ + /** latest completed checkpoint (protected by latch.wr_lock()) */ Atomic_relaxed last_checkpoint_lsn; lsn_t next_checkpoint_lsn; /*!< next checkpoint lsn */ - /** next checkpoint number (protected by mutex) */ + /** next checkpoint number (protected by latch.wr_lock()) */ ulint next_checkpoint_no; /** number of pending checkpoint writes */ ulint n_pending_checkpoint_writes; @@ -299,6 +293,9 @@ public: void close_file(); + /** Calculate the checkpoint safety margins. */ + static void set_capacity(); + lsn_t get_lsn(std::memory_order order= std::memory_order_relaxed) const { return lsn.load(order); } void set_lsn(lsn_t lsn) { this->lsn.store(lsn, std::memory_order_release); } @@ -310,17 +307,17 @@ public: /** Initialize the LSN on initial log file creation. */ lsn_t init_lsn() noexcept { - mysql_mutex_lock(&mutex); + latch.wr_lock(SRW_LOCK_CALL); const lsn_t lsn{get_lsn()}; flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); write_lsn= lsn; - mysql_mutex_unlock(&mutex); + latch.wr_unlock(); return lsn; } void set_recovered_lsn(lsn_t lsn) noexcept { - mysql_mutex_assert_owner(&mutex); + ut_ad(latch.is_write_locked()); write_lsn= lsn; this->lsn.store(lsn, std::memory_order_relaxed); flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); @@ -360,20 +357,29 @@ public: static size_t get_block_size() { return 512; } #endif +private: + /** Wait in append_prepare() for buffer to become available + @param ex whether log_sys.latch is exclusively locked */ + ATTRIBUTE_COLD static void append_prepare_wait(bool ex) noexcept; +public: /** Reserve space in the log buffer for appending data. - @param size upper limit of the length of the data to append(), in bytes - @return the current LSN */ - inline lsn_t append_prepare(size_t size) noexcept; + @tparam pmem log_sys.is_pmem() + @param size total length of the data to append(), in bytes + @param ex whether log_sys.latch is exclusively locked + @return the start LSN and the buffer position for append() */ + template + inline std::pair append_prepare(size_t size, bool ex) noexcept; /** Append a string of bytes to the redo log. + @param d destination @param s string of bytes @param size length of str, in bytes */ - void append(const void *s, size_t size) noexcept + void append(byte *&d, const void *s, size_t size) noexcept { - mysql_mutex_assert_owner(&mutex); - ut_ad(buf_free + size <= (is_pmem() ? file_size : buf_size)); - memcpy(buf + buf_free, s, size); - buf_free+= size; + ut_ad(latch.is_locked()); + ut_ad(d + size <= buf + (is_pmem() ? file_size : buf_size)); + memcpy(d, s, size); + d+= size; } /** Set the log file format. */ @@ -409,14 +415,15 @@ public: return START_OFFSET + (lsn - first_lsn) % capacity(); } - /** Write checkpoint information to the log header and release mutex. + /** Write checkpoint information and invoke latch.wr_unlock(). @param end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */ inline void write_checkpoint(lsn_t end_lsn) noexcept; - /** Write buf to ib_logfile0 and release mutex. + /** Write buf to ib_logfile0. + @tparam release_latch whether to invoke latch.wr_unlock() @return new write target @retval 0 if everything was written */ - inline lsn_t write_buf() noexcept; + template inline lsn_t write_buf() noexcept; /** Create the log. */ void create(lsn_t lsn) noexcept; diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index b352b0d5b1a..e2d2024b028 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -420,9 +420,9 @@ extern bool recv_no_ibuf_operations; /** TRUE when recv_init_crash_recovery() has been called. */ extern bool recv_needed_recovery; #ifdef UNIV_DEBUG -/** TRUE if writing to the redo log (mtr_commit) is forbidden. -Protected by log_sys.mutex. */ -extern bool recv_no_log_write; +/** whether writing to the redo log is forbidden; +protected by exclusive log_sys.latch. */ +extern bool recv_no_log_write; #endif /* UNIV_DEBUG */ /** TRUE if buf_page_is_corrupted() should check if the log sequence diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index ed2cc64ba7a..dce1b4e9378 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -107,7 +107,7 @@ struct mtr_t { /** Commit a mini-transaction that did not modify any pages, but generated some redo log on a higher level, such as FILE_MODIFY records and an optional FILE_CHECKPOINT marker. - The caller must hold log_sys.mutex. + The caller must hold exclusive log_sys.latch. This is to be used at log_checkpoint(). @param checkpoint_lsn the log sequence number of a checkpoint, or 0 @return current LSN */ @@ -632,13 +632,15 @@ private: ATTRIBUTE_NOINLINE void encrypt(); /** Append the redo log records to the redo log buffer. + @param ex whether log_sys.latch is already exclusively locked @return {start_lsn,flush_ahead} */ - std::pair do_write(); + std::pair do_write(bool ex); /** Append the redo log records to the redo log buffer. @param len number of bytes to write + @param ex whether log_sys.latch is exclusively locked @return {start_lsn,flush_ahead} */ - std::pair finish_write(size_t len); + std::pair finish_write(size_t len, bool ex); /** Release the resources */ inline void release_resources(); diff --git a/storage/innobase/include/mtr0mtr.inl b/storage/innobase/include/mtr0mtr.inl index 21ff912e9c7..b275f3ebcbe 100644 --- a/storage/innobase/include/mtr0mtr.inl +++ b/storage/innobase/include/mtr0mtr.inl @@ -49,9 +49,8 @@ mtr_t::memo_push(void* object, mtr_memo_type_t type) ut_ad(ut_is_2pow(type)); /* If this mtr has x-fixed a clean page then we set - the made_dirty flag. This tells us if we need to - grab log_sys.flush_order_mutex at mtr_t::commit() so that we - can insert the dirtied page into the flush list. */ + the made_dirty flag. This tells mtr_t::commit() + to hold log_sys.latch longer. */ if (!m_made_dirty && (type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX)) { diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 5fe293a3649..36f7a7095b3 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -86,9 +86,6 @@ struct srv_stats_t /** Count the amount of data written in total (in bytes) */ ulint_ctr_1_t data_written; - /** Store the number of write requests issued */ - ulint_ctr_1_t buf_pool_write_requests; - /** Number of buffer pool reads that led to the reading of a disk page */ ulint_ctr_1_t buf_pool_reads; @@ -684,7 +681,6 @@ struct export_var_t{ ulint innodb_buffer_pool_pages_old; ulint innodb_buffer_pool_read_requests; /*!< buf_pool.stat.n_page_gets */ ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */ - ulint innodb_buffer_pool_write_requests;/*!< srv_stats.buf_pool_write_requests */ ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */ ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */ ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/ diff --git a/storage/innobase/include/srw_lock.h b/storage/innobase/include/srw_lock.h index 1705c63b8ad..9fba021df25 100644 --- a/storage/innobase/include/srw_lock.h +++ b/storage/innobase/include/srw_lock.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2020, 2021, MariaDB Corporation. +Copyright (c) 2020, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -520,3 +520,12 @@ typedef srw_lock_impl srw_lock; typedef srw_lock_impl srw_spin_lock; #endif + +/** Simple spin lock */ +struct sspin_lock +{ + std::atomic word{0}; + void lock() noexcept; + void unlock() noexcept + { ut_ad(word); word.store(0, std::memory_order_release); } +}; diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index cb34f6c31f6..8dcd3c984de 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -517,8 +517,6 @@ extern mysql_pfs_key_t fts_pll_tokenize_mutex_key; extern mysql_pfs_key_t ibuf_bitmap_mutex_key; extern mysql_pfs_key_t ibuf_mutex_key; extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; -extern mysql_pfs_key_t log_sys_mutex_key; -extern mysql_pfs_key_t log_flush_order_mutex_key; extern mysql_pfs_key_t recalc_pool_mutex_key; extern mysql_pfs_key_t purge_sys_pq_mutex_key; extern mysql_pfs_key_t recv_sys_mutex_key; @@ -547,5 +545,6 @@ extern mysql_pfs_key_t index_tree_rw_lock_key; extern mysql_pfs_key_t index_online_log_key; extern mysql_pfs_key_t trx_sys_rw_lock_key; extern mysql_pfs_key_t lock_latch_key; +extern mysql_pfs_key_t log_latch_key; # endif /* UNIV_PFS_RWLOCK */ #endif /* HAVE_PSI_INTERFACE */ diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index bd146237000..739adcf1b97 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -74,52 +74,24 @@ log_t log_sys; #define LOG_BUF_FLUSH_MARGIN ((4 * 4096) /* cf. log_t::append_prepare() */ \ + (4U << srv_page_size_shift)) -/** Calculate the recommended highest values for lsn - last_checkpoint_lsn -and lsn - buf_pool.get_oldest_modification(). -@param[in] file_size requested innodb_log_file_size -@retval true on success -@retval false if the smallest log group is too small to -accommodate the number of OS threads in the database server */ -bool -log_set_capacity(ulonglong file_size) +void log_t::set_capacity() { - mysql_mutex_assert_owner(&log_sys.mutex); + ut_ad(log_sys.latch.is_write_locked()); /* Margin for the free space in the smallest log, before a new query step which modifies the database, is started */ - const size_t LOG_CHECKPOINT_FREE_PER_THREAD = 4U - << srv_page_size_shift; - const size_t LOG_CHECKPOINT_EXTRA_FREE = 8U << srv_page_size_shift; - lsn_t margin; - ulint free; - - lsn_t smallest_capacity = file_size - log_t::START_OFFSET; + lsn_t smallest_capacity = srv_log_file_size - log_t::START_OFFSET; /* Add extra safety */ smallest_capacity -= smallest_capacity / 10; - /* For each OS thread we must reserve so much free space in the - smallest log group that it can accommodate the log entries produced - by single query steps: running out of free log space is a serious - system error which requires rebooting the database. */ - - free = LOG_CHECKPOINT_FREE_PER_THREAD * 10 - + LOG_CHECKPOINT_EXTRA_FREE; - if (free >= smallest_capacity / 2) { - sql_print_error("InnoDB: innodb_log_file_size is too small." - " %s", INNODB_PARAMETERS_MSG); - return false; - } - - margin = smallest_capacity - free; - margin = margin - margin / 10; /* Add still some extra safety */ + lsn_t margin = smallest_capacity - (48 << srv_page_size_shift); + margin -= margin / 10; /* Add still some extra safety */ log_sys.log_capacity = smallest_capacity; log_sys.max_modified_age_async = margin - margin / 8; log_sys.max_checkpoint_age = margin; - - return(true); } /** Initialize the redo log subsystem. */ @@ -128,14 +100,7 @@ void log_t::create() ut_ad(this == &log_sys); ut_ad(!is_initialised()); -#if defined(__aarch64__) - mysql_mutex_init(log_sys_mutex_key, &mutex, MY_MUTEX_INIT_FAST); - mysql_mutex_init( - log_flush_order_mutex_key, &flush_order_mutex, MY_MUTEX_INIT_FAST); -#else - mysql_mutex_init(log_sys_mutex_key, &mutex, nullptr); - mysql_mutex_init(log_flush_order_mutex_key, &flush_order_mutex, nullptr); -#endif + latch.SRW_LOCK_INIT(log_latch_key); /* LSN 0 and 1 are reserved; @see buf_page_t::oldest_modification_ */ lsn.store(FIRST_LSN, std::memory_order_relaxed); @@ -272,7 +237,7 @@ void log_t::attach(log_file_t file, os_offset_t size) void log_t::create(lsn_t lsn) noexcept { - mysql_mutex_assert_owner(&mutex); + ut_ad(latch.is_write_locked()); ut_ad(!recv_no_log_write); ut_ad(is_latest()); ut_ad(this == &log_sys); @@ -516,7 +481,6 @@ static size_t log_pad(lsn_t lsn, size_t pad, byte *begin, byte *extra) inline void log_t::persist(lsn_t lsn) noexcept { ut_ad(is_pmem()); - mysql_mutex_assert_not_owner(&mutex); ut_ad(!write_lock.is_owner()); ut_ad(!flush_lock.is_owner()); @@ -551,13 +515,13 @@ inline void log_t::persist(lsn_t lsn) noexcept } #endif -/** Write buf to ib_logfile0 and release mutex. +/** Write buf to ib_logfile0. +@tparam release_latch whether to invoke latch.wr_unlock() @return new write target @retval 0 if everything was written */ -inline lsn_t log_t::write_buf() noexcept +template inline lsn_t log_t::write_buf() noexcept { - mysql_mutex_assert_owner(&mutex); - + ut_ad(latch.is_write_locked()); ut_ad(!srv_read_only_mode); ut_ad(!is_pmem()); @@ -565,7 +529,8 @@ inline lsn_t log_t::write_buf() noexcept if (write_lsn >= lsn) { - mysql_mutex_unlock(&mutex); + if (release_latch) + latch.wr_unlock(); ut_ad(write_lsn == lsn); } else @@ -581,31 +546,33 @@ inline lsn_t log_t::write_buf() noexcept const byte *write_buf{buf}; size_t length{buf_free}; ut_ad(length >= (calc_lsn_offset(write_lsn) & block_size_1)); - buf_free&= block_size_1; - ut_ad(buf_free == ((lsn - first_lsn) & block_size_1)); + const size_t new_buf_free{length & block_size_1}; + buf_free= new_buf_free; + ut_ad(new_buf_free == ((lsn - first_lsn) & block_size_1)); - if (buf_free) + if (new_buf_free) { #if 0 /* TODO: Pad the last log block with dummy records. */ - buf_free= log_pad(lsn, get_block_size() - buf_free, - buf + buf_free, flush_buf); + buf_free= log_pad(lsn, get_block_size() - new_buf_free, + buf + new_buf_free, flush_buf); ... /* TODO: Update the LSN and adjust other code. */ #else /* The rest of the block will be written as garbage. (We want to avoid memset() while holding mutex.) This block will be overwritten later, once records beyond the current LSN are generated. */ - MEM_MAKE_DEFINED(buf + length, get_block_size() - buf_free); + MEM_MAKE_DEFINED(buf + length, get_block_size() - new_buf_free); buf[length]= 0; /* allow recovery to catch EOF faster */ length&= ~block_size_1; - memcpy_aligned<16>(flush_buf, buf + length, (buf_free + 15) & ~15); + memcpy_aligned<16>(flush_buf, buf + length, (new_buf_free + 15) & ~15); length+= get_block_size(); #endif } std::swap(buf, flush_buf); write_to_log++; - mysql_mutex_unlock(&mutex); + if (release_latch) + latch.wr_unlock(); if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) { @@ -690,8 +657,8 @@ repeat: if (write_lock.acquire(lsn, durable ? nullptr : callback) == group_commit_lock::ACQUIRED) { - mysql_mutex_lock(&log_sys.mutex); - write_lsn= log_sys.write_buf(); + log_sys.latch.wr_lock(SRW_LOCK_CALL); + write_lsn= log_sys.write_buf(); } else write_lsn= 0; @@ -718,11 +685,9 @@ void log_buffer_flush_to_disk(bool durable) log_write_up_to(log_sys.get_lsn(std::memory_order_acquire), durable); } -/** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */ +/** Prepare to invoke log_write_and_flush(), before acquiring log_sys.latch. */ ATTRIBUTE_COLD void log_write_and_flush_prepare() { - mysql_mutex_assert_not_owner(&log_sys.mutex); - if (log_sys.is_pmem()) return; @@ -732,23 +697,18 @@ ATTRIBUTE_COLD void log_write_and_flush_prepare() group_commit_lock::ACQUIRED); } -/** Durably write the log and release log_sys.mutex */ +/** Durably write the log up to log_sys.get_lsn(). */ ATTRIBUTE_COLD void log_write_and_flush() { ut_ad(!srv_read_only_mode); if (!log_sys.is_pmem()) { - const lsn_t write_lsn{log_sys.write_buf()}; - const lsn_t flush_lsn{log_flush(write_lock.value())}; - if (write_lsn || flush_lsn) - log_write_up_to(std::max(write_lsn, flush_lsn), true, &dummy_callback); + log_sys.write_buf(); + log_flush(write_lock.value()); } #ifdef HAVE_PMEM else - { - mysql_mutex_unlock(&log_sys.mutex); log_sys.persist(log_sys.get_lsn()); - } #endif } @@ -758,11 +718,7 @@ Tries to establish a big enough margin of free space in the log buffer, such that a new log entry can be catenated without an immediate need for a flush. */ ATTRIBUTE_COLD static void log_flush_margin() { - mysql_mutex_lock(&log_sys.mutex); - const bool flush{log_sys.buf_free > log_sys.max_buf_free}; - mysql_mutex_unlock(&log_sys.mutex); - - if (flush) + if (log_sys.buf_free > log_sys.max_buf_free) log_buffer_flush_to_disk(false); } @@ -775,26 +731,27 @@ ATTRIBUTE_COLD static void log_checkpoint_margin() { while (log_sys.check_flush_or_checkpoint()) { - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.rd_lock(SRW_LOCK_CALL); ut_ad(!recv_no_log_write); if (!log_sys.check_flush_or_checkpoint()) { func_exit: - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.rd_unlock(); return; } const lsn_t lsn= log_sys.get_lsn(); const lsn_t checkpoint= log_sys.last_checkpoint_lsn; const lsn_t sync_lsn= checkpoint + log_sys.max_checkpoint_age; + if (lsn <= sync_lsn) { log_sys.set_check_flush_or_checkpoint(false); goto func_exit; } - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.rd_unlock(); /* We must wait to prevent the tail of the log overwriting the head. */ buf_flush_wait_flushed(std::min(sync_lsn, checkpoint + (1U << 20))); @@ -944,9 +901,9 @@ wait_suspend_loop: } if (log_sys.is_initialised()) { - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.rd_lock(SRW_LOCK_CALL); const ulint n_write = log_sys.n_pending_checkpoint_writes; - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.rd_unlock(); if (n_write) { if (srv_print_verbose_log && count > 600) { @@ -987,7 +944,7 @@ wait_suspend_loop: ? SIZE_OF_FILE_CHECKPOINT + 8 : SIZE_OF_FILE_CHECKPOINT; - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.rd_lock(SRW_LOCK_CALL); lsn = log_sys.get_lsn(); @@ -995,7 +952,7 @@ wait_suspend_loop: && lsn != log_sys.last_checkpoint_lsn + sizeof_cp; ut_ad(lsn >= log_sys.last_checkpoint_lsn); - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.rd_unlock(); if (lsn_changed) { goto loop; @@ -1041,7 +998,7 @@ log_print( double time_elapsed; time_t current_time; - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.rd_lock(SRW_LOCK_CALL); const lsn_t lsn= log_sys.get_lsn(); mysql_mutex_lock(&buf_pool.flush_list_mutex); @@ -1079,7 +1036,7 @@ log_print( log_sys.n_log_ios_old = log_sys.n_log_ios; log_sys.last_printout_time = current_time; - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.rd_unlock(); } /**********************************************************************//** @@ -1112,8 +1069,7 @@ void log_t::close() ut_ad(!flush_buf); #endif - mysql_mutex_destroy(&mutex); - mysql_mutex_destroy(&flush_order_mutex); + latch.destroy(); recv_sys.close(); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index fb667a3670c..5b2a006859d 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -62,7 +62,7 @@ recv_sys_t recv_sys; bool recv_needed_recovery; #ifdef UNIV_DEBUG /** TRUE if writing to the redo log (mtr_commit) is forbidden. -Protected by log_sys.mutex. */ +Protected by log_sys.latch. */ bool recv_no_log_write = false; #endif /* UNIV_DEBUG */ @@ -2235,7 +2235,9 @@ template inline recv_sys_t::parse_mtr_result recv_sys_t::parse(store_t store, source &l) noexcept { - mysql_mutex_assert_owner(&log_sys.mutex); + ut_ad(log_sys.latch.is_write_locked() || + srv_operation == SRV_OPERATION_BACKUP || + srv_operation == SRV_OPERATION_BACKUP_NO_DEFER); mysql_mutex_assert_owner(&mutex); ut_ad(log_sys.next_checkpoint_lsn); ut_ad(log_sys.is_latest()); @@ -2970,17 +2972,23 @@ set_start_lsn: if (start_lsn) { ut_ad(end_lsn >= start_lsn); + ut_ad(!block->page.oldest_modification()); mach_write_to_8(FIL_PAGE_LSN + frame, end_lsn); - if (UNIV_LIKELY(frame == block->page.frame)) { + if (UNIV_LIKELY(!block->page.zip.data)) { mach_write_to_8(srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM + frame, end_lsn); } else { buf_zip_decompress(block, false); } - - buf_block_modify_clock_inc(block); - buf_flush_note_modification(block, start_lsn, end_lsn); + /* The following is adapted from + buf_pool_t::insert_into_flush_list() */ + mysql_mutex_lock(&buf_pool.flush_list_mutex); + buf_pool.stat.flush_list_bytes+= block->physical_size(); + block->page.set_oldest_modification(start_lsn); + UT_LIST_ADD_FIRST(buf_pool.flush_list, &block->page); + buf_pool.page_cleaner_wakeup(); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); } else if (free_page && init) { /* There have been no operations that modify the page. Any buffered changes must not be merged. A subsequent @@ -3271,9 +3279,6 @@ void recv_sys_t::apply(bool last_batch) srv_operation == SRV_OPERATION_RESTORE || srv_operation == SRV_OPERATION_RESTORE_EXPORT); -#ifdef SAFE_MUTEX - DBUG_ASSERT(!last_batch == mysql_mutex_is_owner(&log_sys.mutex)); -#endif /* SAFE_MUTEX */ mysql_mutex_assert_owner(&mutex); timespec abstime; @@ -3283,15 +3288,15 @@ void recv_sys_t::apply(bool last_batch) if (is_corrupt_log()) return; if (last_batch) - { - mysql_mutex_assert_not_owner(&log_sys.mutex); my_cond_wait(&cond, &mutex.m_mutex); - } else { - mysql_mutex_unlock(&mutex); + ut_ad(log_sys.latch.is_write_locked()); + log_sys.latch.wr_unlock(); set_timespec_nsec(abstime, 500000000ULL); /* 0.5s */ - my_cond_timedwait(&cond, &log_sys.mutex.m_mutex, &abstime); + my_cond_timedwait(&cond, &mutex.m_mutex, &abstime); + mysql_mutex_unlock(&mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); mysql_mutex_lock(&mutex); } } @@ -3398,7 +3403,6 @@ next_free_block: { if (last_batch) { - mysql_mutex_assert_not_owner(&log_sys.mutex); if (!empty) my_cond_wait(&cond, &mutex.m_mutex); else @@ -3412,9 +3416,12 @@ next_free_block: } else { - mysql_mutex_unlock(&mutex); + ut_ad(log_sys.latch.is_write_locked()); + log_sys.latch.wr_unlock(); set_timespec_nsec(abstime, 500000000ULL); /* 0.5s */ - my_cond_timedwait(&cond, &log_sys.mutex.m_mutex, &abstime); + my_cond_timedwait(&cond, &mutex.m_mutex, &abstime); + mysql_mutex_unlock(&mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); mysql_mutex_lock(&mutex); } continue; @@ -3432,10 +3439,9 @@ next_free_block: else { mlog_init.reset(); - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); } - mysql_mutex_assert_not_owner(&log_sys.mutex); mysql_mutex_unlock(&mutex); if (last_batch && srv_operation != SRV_OPERATION_RESTORE && @@ -3451,7 +3457,7 @@ next_free_block: if (!last_batch) { buf_pool_invalidate(); - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); } #ifdef HAVE_PMEM else if (log_sys.is_pmem()) @@ -3511,7 +3517,7 @@ static bool recv_scan_log(bool last_phase) for (ut_d(lsn_t source_offset= 0);;) { - mysql_mutex_assert_owner(&log_sys.mutex); + ut_ad(log_sys.latch.is_write_locked()); #ifdef UNIV_DEBUG const bool wrap{source_offset + recv_sys.len == log_sys.file_size}; #endif @@ -3868,7 +3874,7 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace) static dberr_t recv_rename_files() { mysql_mutex_assert_owner(&recv_sys.mutex); - mysql_mutex_assert_owner(&log_sys.mutex); + ut_ad(log_sys.latch.is_write_locked()); dberr_t err= DB_SUCCESS; @@ -3963,20 +3969,16 @@ dberr_t recv_recovery_from_checkpoint_start() recv_sys.recovery_on = true; - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); dberr_t err = recv_sys.find_checkpoint(); if (err != DB_SUCCESS) { early_exit: - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); return err; } - if (!log_set_capacity(srv_log_file_size)) { -err_exit: - err = DB_ERROR; - goto early_exit; - } + log_sys.set_capacity(); /* Start reading the log from the checkpoint lsn. The variable contiguous_lsn contains an lsn up to which the log is known to @@ -4103,7 +4105,9 @@ read_only_recovery: } if (recv_sys.lsn < log_sys.next_checkpoint_lsn) { - goto err_exit; +err_exit: + err = DB_ERROR; + goto early_exit; } if (!srv_read_only_mode && log_sys.is_latest()) { @@ -4142,7 +4146,7 @@ read_only_recovery: err = recv_rename_files(); } mysql_mutex_unlock(&recv_sys.mutex); - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); recv_lsn_checks_on = true; diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 2b5d8946db4..bf17d79cf7a 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -312,16 +312,10 @@ struct DebugCheck { struct ReleaseBlocks { const lsn_t start, end; -#ifdef UNIV_DEBUG - const mtr_buf_t &memo; - - ReleaseBlocks(lsn_t start, lsn_t end, const mtr_buf_t &memo) : - start(start), end(end), memo(memo) -#else /* UNIV_DEBUG */ - ReleaseBlocks(lsn_t start, lsn_t end, const mtr_buf_t&) : - start(start), end(end) -#endif /* UNIV_DEBUG */ + mutable size_t modified; + ReleaseBlocks(lsn_t start, lsn_t end) : start(start), end(end), modified(0) { + ut_ad(!srv_read_only_mode); ut_ad(start); ut_ad(end); } @@ -340,8 +334,25 @@ struct ReleaseBlocks return true; } - buf_flush_note_modification(static_cast(slot->object), - start, end); + modified++; + buf_block_t *b= static_cast(slot->object); + ut_d(const auto s= b->page.state()); + ut_ad(s > buf_page_t::FREED); + ut_ad(s < buf_page_t::READ_FIX); + ut_ad(mach_read_from_8(b->page.frame + FIL_PAGE_LSN) <= end); + mach_write_to_8(b->page.frame + FIL_PAGE_LSN, end); + if (UNIV_LIKELY_NULL(b->page.zip.data)) + memcpy_aligned<8>(FIL_PAGE_LSN + b->page.zip.data, + FIL_PAGE_LSN + b->page.frame, 8); + + const lsn_t oldest_modification= b->page.oldest_modification(); + + if (oldest_modification > 1) + ut_ad(oldest_modification <= start); + else if (fsp_is_system_temporary(b->page.id().space())) + b->page.set_temp_modified(); + else + buf_pool.insert_into_flush_list(b, start); return true; } }; @@ -403,15 +414,9 @@ void mtr_t::commit() if (UNIV_LIKELY(m_log_mode == MTR_LOG_ALL)) { - lsns= do_write(); - - if (m_made_dirty) - mysql_mutex_lock(&log_sys.flush_order_mutex); - - /* It is now safe to release log_sys.mutex because the - buf_pool.flush_order_mutex will ensure that we are the first one - to insert into buf_pool.flush_list. */ - mysql_mutex_unlock(&log_sys.mutex); + lsns= do_write(false); + if (!m_made_dirty) + log_sys.latch.rd_unlock(); } else { @@ -420,7 +425,7 @@ void mtr_t::commit() m_commit_lsn= log_sys.get_lsn(); lsns= { m_commit_lsn, PAGE_FLUSH_NO }; if (UNIV_UNLIKELY(m_made_dirty)) /* This should be IMPORT TABLESPACE */ - mysql_mutex_lock(&log_sys.flush_order_mutex); + log_sys.latch.rd_lock(SRW_LOCK_CALL); } if (m_freed_pages) @@ -445,16 +450,23 @@ void mtr_t::commit() else ut_ad(!m_freed_space); - m_memo.for_each_block_in_reverse(CIterate - (ReleaseBlocks(lsns.first, m_commit_lsn, - m_memo))); + ReleaseBlocks rb{lsns.first, m_commit_lsn}; + m_memo.for_each_block_in_reverse(CIterate(rb)); if (m_made_dirty) - mysql_mutex_unlock(&log_sys.flush_order_mutex); + log_sys.latch.rd_unlock(); m_memo.for_each_block_in_reverse(CIterate()); if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO)) buf_flush_ahead(m_commit_lsn, lsns.second == PAGE_FLUSH_SYNC); + + if (rb.modified) + { + mysql_mutex_lock(&buf_pool.flush_list_mutex); + buf_pool.flush_list_requests+= rb.modified; + buf_pool.page_cleaner_wakeup(); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + } } else m_memo.for_each_block_in_reverse(CIterate()); @@ -523,12 +535,13 @@ void mtr_t::commit_shrink(fil_space_t &space) ut_ad(UT_LIST_GET_LEN(space.chain) == 1); log_write_and_flush_prepare(); + log_sys.latch.wr_lock(SRW_LOCK_CALL); - const lsn_t start_lsn= do_write().first; + const lsn_t start_lsn= do_write(true).first; - mysql_mutex_lock(&log_sys.flush_order_mutex); /* Durably write the reduced FSP_SIZE before truncating the data file. */ log_write_and_flush(); + ut_ad(log_sys.latch.is_write_locked()); os_file_truncate(space.chain.start->name, space.chain.start->handle, os_offset_t{space.size} << srv_page_size_shift, true); @@ -557,9 +570,8 @@ void mtr_t::commit_shrink(fil_space_t &space) m_memo.for_each_block_in_reverse(CIterate{space}); m_memo.for_each_block_in_reverse(CIterate - (ReleaseBlocks(start_lsn, m_commit_lsn, - m_memo))); - mysql_mutex_unlock(&log_sys.flush_order_mutex); + (ReleaseBlocks{start_lsn, m_commit_lsn})); + log_sys.latch.wr_unlock(); mysql_mutex_lock(&fil_system.mutex); ut_ad(space.is_being_truncated); @@ -582,7 +594,7 @@ This is to be used at log_checkpoint(). @return current LSN */ lsn_t mtr_t::commit_files(lsn_t checkpoint_lsn) { - mysql_mutex_assert_owner(&log_sys.mutex); + ut_ad(log_sys.latch.is_write_locked()); ut_ad(is_active()); ut_ad(!is_inside_ibuf()); ut_ad(m_log_mode == MTR_LOG_ALL); @@ -616,7 +628,7 @@ lsn_t mtr_t::commit_files(lsn_t checkpoint_lsn) m_crc= 0; m_log.for_each_block([this](const mtr_buf_t::block_t *b) { m_crc= my_crc32c(m_crc, b->begin(), b->used()); return true; }); - finish_write(size); + finish_write(size, true); release_resources(); if (checkpoint_lsn) @@ -753,8 +765,6 @@ mtr_t::release_page(const void* ptr, mtr_memo_type_t type) ut_ad(0); } -static bool log_margin_warned; -static time_t log_margin_warn_time; static bool log_close_warned; static time_t log_close_warn_time; @@ -774,65 +784,73 @@ ATTRIBUTE_COLD static void log_overwrite_warning(lsn_t age, lsn_t capacity) } } -/** Reserve space in the log buffer for appending data. -@param size upper limit of the length of the data to append(), in bytes -@return the current LSN */ -inline lsn_t log_t::append_prepare(size_t size) noexcept +/** Wait in append_prepare() for buffer to become available +@param ex whether log_sys.latch is exclusively locked */ +ATTRIBUTE_COLD void log_t::append_prepare_wait(bool ex) noexcept { - mysql_mutex_assert_owner(&mutex); + log_sys.waits++; + log_sys.lsn_lock.unlock(); - lsn_t lsn= get_lsn(); + if (ex) + log_sys.latch.wr_unlock(); + else + log_sys.latch.rd_unlock(); - if (UNIV_UNLIKELY(size > log_capacity)) + DEBUG_SYNC_C("log_buf_size_exceeded"); + log_buffer_flush_to_disk(log_sys.is_pmem()); + + if (ex) + log_sys.latch.wr_lock(SRW_LOCK_CALL); + else + log_sys.latch.rd_lock(SRW_LOCK_CALL); + + log_sys.lsn_lock.lock(); +} + +/** Reserve space in the log buffer for appending data. +@tparam pmem log_sys.is_pmem() +@param size total length of the data to append(), in bytes +@param ex whether log_sys.latch is exclusively locked +@return the start LSN and the buffer position for append() */ +template +inline +std::pair log_t::append_prepare(size_t size, bool ex) noexcept +{ + ut_ad(latch.is_locked()); + ut_ad(pmem == is_pmem()); +#ifndef _WIN32 // there is no accurate is_write_locked() on SRWLOCK + ut_ad(ex == latch.is_write_locked()); +#endif + const lsn_t checkpoint_margin{last_checkpoint_lsn + log_capacity - size}; + const size_t avail{(pmem ? size_t(capacity()) : buf_size) - size}; + lsn_lock.lock(); + write_to_buf++; + + for (ut_d(int count= 50); + UNIV_UNLIKELY((pmem + ? size_t(get_lsn() - + get_flushed_lsn(std::memory_order_relaxed)) + : size_t{buf_free}) > avail); ) { - time_t t= time(nullptr); - - /* return with warning output to avoid deadlock */ - if (!log_margin_warned || difftime(t, log_margin_warn_time) > 15) - { - log_margin_warned= true; - log_margin_warn_time= t; - - sql_print_error("InnoDB: innodb_log_file_size is too small " - "for mini-transaction size %zu", size); - } - goto throttle; + append_prepare_wait(ex); + ut_ad(count--); } - else if (UNIV_UNLIKELY(lsn + size > last_checkpoint_lsn + log_capacity)) - throttle: + + const lsn_t l{lsn.load(std::memory_order_relaxed)}; + lsn.store(l + size, std::memory_order_relaxed); + const size_t b{buf_free}; + size_t new_buf_free{b}; + new_buf_free+= size; + if (pmem && new_buf_free >= file_size) + new_buf_free-= size_t(capacity()); + buf_free= new_buf_free; + lsn_lock.unlock(); + + if (UNIV_UNLIKELY(l > checkpoint_margin) || + (!pmem && b >= max_buf_free)) set_check_flush_or_checkpoint(); - if (is_pmem()) - { - for (ut_d(int count= 50); capacity() - size < - size_t(lsn - flushed_to_disk_lsn.load(std::memory_order_relaxed)); ) - { - waits++; - mysql_mutex_unlock(&mutex); - DEBUG_SYNC_C("log_buf_size_exceeded"); - log_write_up_to(lsn, true); - ut_ad(count--); - mysql_mutex_lock(&mutex); - lsn= get_lsn(); - } - return lsn; - } - - /* Calculate the amount of free space needed. */ - size= (4 * 4096) - size + log_sys.buf_size; - - for (ut_d(int count= 50); UNIV_UNLIKELY(buf_free > size); ) - { - waits++; - mysql_mutex_unlock(&mutex); - DEBUG_SYNC_C("log_buf_size_exceeded"); - log_write_up_to(lsn, false); - ut_ad(count--); - mysql_mutex_lock(&mutex); - lsn= get_lsn(); - } - - return lsn; + return {l, &buf[b]}; } /** Finish appending data to the log. @@ -840,9 +858,7 @@ inline lsn_t log_t::append_prepare(size_t size) noexcept @return whether buf_flush_ahead() will have to be invoked */ static mtr_t::page_flush_ahead log_close(lsn_t lsn) noexcept { - mysql_mutex_assert_owner(&log_sys.mutex); - log_sys.write_to_buf++; - log_sys.set_lsn(lsn); + ut_ad(log_sys.latch.is_locked()); const lsn_t checkpoint_age= lsn - log_sys.last_checkpoint_lsn; @@ -859,10 +875,11 @@ static mtr_t::page_flush_ahead log_close(lsn_t lsn) noexcept return mtr_t::PAGE_FLUSH_SYNC; } -std::pair mtr_t::do_write() +std::pair mtr_t::do_write(bool ex) { ut_ad(!recv_no_log_write); ut_ad(m_log_mode == MTR_LOG_ALL); + ut_ad(!ex || log_sys.latch.is_write_locked()); size_t len= m_log.size() + 5; ut_ad(len > 5); @@ -879,86 +896,93 @@ std::pair mtr_t::do_write() { m_crc= my_crc32c(m_crc, b->begin(), b->used()); return true; }); } - mysql_mutex_lock(&log_sys.mutex); + if (!ex) + log_sys.latch.rd_lock(SRW_LOCK_CALL); - if (m_user_space && !is_predefined_tablespace(m_user_space->id) && - !m_user_space->max_lsn) - name_write(); + if (UNIV_UNLIKELY(m_user_space && !m_user_space->max_lsn && + !is_predefined_tablespace(m_user_space->id))) + { + if (!ex) + { + log_sys.latch.rd_unlock(); + log_sys.latch.wr_lock(SRW_LOCK_CALL); + if (UNIV_LIKELY(!m_user_space->max_lsn)) + name_write(); + std::pair p{finish_write(len, true)}; + log_sys.latch.wr_unlock(); + log_sys.latch.rd_lock(SRW_LOCK_CALL); + return p; + } + else + name_write(); + } - return finish_write(len); + return finish_write(len, ex); } /** Write the mini-transaction log to the redo log buffer. +@param len number of bytes to write +@param ex whether log_sys.latch is exclusively locked @return {start_lsn,flush_ahead} */ -std::pair mtr_t::finish_write(size_t len) +std::pair +mtr_t::finish_write(size_t len, bool ex) { ut_ad(!recv_no_log_write); ut_ad(m_log_mode == MTR_LOG_ALL); - const lsn_t start_lsn= log_sys.append_prepare(len); const size_t size{m_commit_lsn ? 5U + 8U : 5U}; + std::pair start; if (!log_sys.is_pmem()) { - m_log.for_each_block([](const mtr_buf_t::block_t *b) - { log_sys.append(b->begin(), b->used()); return true; }); - - if (log_sys.buf_free >= log_sys.max_buf_free) - log_sys.set_check_flush_or_checkpoint(); + start= log_sys.append_prepare(len, ex); + m_log.for_each_block([&start](const mtr_buf_t::block_t *b) + { log_sys.append(start.second, b->begin(), b->used()); return true; }); #ifdef HAVE_PMEM write_trailer: #endif - log_sys.buf[log_sys.buf_free]= - log_sys.get_sequence_bit(start_lsn + len - size); + *start.second++= log_sys.get_sequence_bit(start.first + len - size); if (m_commit_lsn) { - byte *nonce= log_sys.buf + log_sys.buf_free + 1; - mach_write_to_8(nonce, m_commit_lsn); - m_crc= my_crc32c(m_crc, nonce, 8); - mach_write_to_4(&log_sys.buf[log_sys.buf_free + 9], m_crc); - log_sys.buf_free+= 8 + 5; - } - else - { - mach_write_to_4(&log_sys.buf[log_sys.buf_free + 1], m_crc); - log_sys.buf_free+= 5; + mach_write_to_8(start.second, m_commit_lsn); + m_crc= my_crc32c(m_crc, start.second, 8); + start.second+= 8; } + mach_write_to_4(start.second, m_crc); } #ifdef HAVE_PMEM - else if (UNIV_LIKELY(log_sys.buf_free + len < log_sys.file_size)) - { - m_log.for_each_block([](const mtr_buf_t::block_t *b) - { log_sys.append(b->begin(), b->used()); return true; }); - goto write_trailer; - } else { - m_log.for_each_block([](const mtr_buf_t::block_t *b) + start= log_sys.append_prepare(len, ex); + if (UNIV_LIKELY(start.second + len <= &log_sys.buf[log_sys.file_size])) + { + m_log.for_each_block([&start](const mtr_buf_t::block_t *b) + { log_sys.append(start.second, b->begin(), b->used()); return true; }); + goto write_trailer; + } + m_log.for_each_block([&start](const mtr_buf_t::block_t *b) { size_t size{b->used()}; - const size_t size_left{log_sys.file_size - log_sys.buf_free}; + const size_t size_left(&log_sys.buf[log_sys.file_size] - start.second); const byte *src= b->begin(); - if (size <= size_left) - { - ::memcpy(log_sys.buf + log_sys.buf_free, src, size); - log_sys.buf_free+= size; - } - else + if (size > size_left) { + ::memcpy(start.second, src, size_left); + start.second= &log_sys.buf[log_sys.START_OFFSET]; + src+= size_left; size-= size_left; - ::memcpy(log_sys.buf + log_sys.buf_free, src, size_left); - ::memcpy(log_sys.buf + log_sys.START_OFFSET, src + size_left, size); - log_sys.buf_free= log_sys.START_OFFSET + size; } + ::memcpy(start.second, src, size); + start.second+= size; return true; }); - const size_t size_left{log_sys.file_size - log_sys.buf_free}; + const size_t size_left(&log_sys.buf[log_sys.file_size] - start.second); if (size_left > size) goto write_trailer; byte tail[5 + 8]; - tail[0]= log_sys.get_sequence_bit(start_lsn + len - size); + tail[0]= log_sys.get_sequence_bit(start.first + len - size); if (m_commit_lsn) { @@ -969,15 +993,14 @@ std::pair mtr_t::finish_write(size_t len) else mach_write_to_4(tail + 1, m_crc); - ::memcpy(log_sys.buf + log_sys.buf_free, tail, size_left); + ::memcpy(start.second, tail, size_left); ::memcpy(log_sys.buf + log_sys.START_OFFSET, tail + size_left, size - size_left); - log_sys.buf_free= log_sys.START_OFFSET + (size - size_left); } #endif - m_commit_lsn= start_lsn + len; - return {start_lsn, log_close(m_commit_lsn)}; + m_commit_lsn= start.first + len; + return {start.first, log_close(m_commit_lsn)}; } /** Find out whether a block was not X-latched by the mini-transaction */ diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index 90d71556a5b..520ff7adff5 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -1371,6 +1371,7 @@ corresponding monitors are turned on/off/reset, and do appropriate mathematics to deduct the actual value. Please also refer to srv_export_innodb_status() for related global counters used by the existing status variables.*/ +TPOOL_SUPPRESS_TSAN void srv_mon_process_existing_counter( /*=============================*/ @@ -1405,7 +1406,7 @@ srv_mon_process_existing_counter( /* innodb_buffer_pool_write_requests, the number of write request */ case MONITOR_OVLD_BUF_POOL_WRITE_REQUEST: - value = srv_stats.buf_pool_write_requests; + value = buf_pool.flush_list_requests; break; /* innodb_buffer_pool_wait_free */ @@ -1714,10 +1715,10 @@ srv_mon_process_existing_counter( break; case MONITOR_LSN_CHECKPOINT_AGE: - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.rd_lock(SRW_LOCK_CALL); value = static_cast(log_sys.get_lsn() - log_sys.last_checkpoint_lsn); - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.rd_unlock(); break; case MONITOR_OVLD_BUF_OLDEST_LSN: diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index c3c4c05c6bf..29a6316fae9 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1024,9 +1024,6 @@ srv_export_innodb_status(void) export_vars.innodb_buffer_pool_read_requests = buf_pool.stat.n_page_gets; - export_vars.innodb_buffer_pool_write_requests = - srv_stats.buf_pool_write_requests; - export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads; export_vars.innodb_buffer_pool_read_ahead_rnd = @@ -1167,13 +1164,13 @@ srv_export_innodb_status(void) mysql_mutex_unlock(&srv_innodb_monitor_mutex); - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.rd_lock(SRW_LOCK_CALL); export_vars.innodb_lsn_current = log_sys.get_lsn(); export_vars.innodb_lsn_flushed = log_sys.get_flushed_lsn(); export_vars.innodb_lsn_last_checkpoint = log_sys.last_checkpoint_lsn; export_vars.innodb_checkpoint_max_age = static_cast( log_sys.max_checkpoint_age); - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.rd_unlock(); export_vars.innodb_os_log_written = export_vars.innodb_lsn_current - recv_sys.lsn; @@ -1818,10 +1815,10 @@ void purge_coordinator_state::refresh(bool full) lsn_hwm= adaptive_purge_threshold + series[n_threads]; } - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.rd_lock(SRW_LOCK_CALL); const lsn_t last= log_sys.last_checkpoint_lsn, max_age= log_sys.max_checkpoint_age; - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.rd_unlock(); lsn_age_factor= ulint(((log_sys.get_lsn() - last) * 100) / max_age); } diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index ba2628ae04c..2166bb51ab8 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -195,12 +195,8 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn, DBUG_ASSERT(!buf_pool.any_io_pending()); - mysql_mutex_lock(&log_sys.mutex); - if (!log_set_capacity(srv_log_file_size)) { -err_exit: - mysql_mutex_unlock(&log_sys.mutex); - return DB_ERROR; - } + log_sys.latch.wr_lock(SRW_LOCK_CALL); + log_sys.set_capacity(); logfile0 = get_log_file_path(LOG_FILE_NAME_PREFIX) .append(INIT_LOG_FILE0); @@ -213,7 +209,9 @@ err_exit: if (!ret) { sql_print_error("InnoDB: Cannot create %s", logfile0.c_str()); - goto err_exit; +err_exit: + log_sys.latch.wr_unlock(); + return DB_ERROR; } ret = os_file_set_size(logfile0.c_str(), file, srv_log_file_size); @@ -244,7 +242,7 @@ err_exit: /* Enable checkpoints in buf_flush_page_cleaner(). */ recv_sys.recovery_on = false; - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); log_make_checkpoint(); log_buffer_flush_to_disk(); @@ -801,7 +799,7 @@ static lsn_t srv_prepare_to_delete_redo_log_file() DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0);); DBUG_PRINT("ib_log", ("After innodb_log_abort_1")); - mysql_mutex_lock(&log_sys.mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); const bool latest_format{log_sys.is_latest()}; lsn_t flushed_lsn{log_sys.get_lsn()}; @@ -846,7 +844,7 @@ same_size: } } - mysql_mutex_unlock(&log_sys.mutex); + log_sys.latch.wr_unlock(); log_write_up_to(flushed_lsn, false); diff --git a/storage/innobase/sync/srw_lock.cc b/storage/innobase/sync/srw_lock.cc index 71414e8ddb2..bc3ef0a7b4e 100644 --- a/storage/innobase/sync/srw_lock.cc +++ b/storage/innobase/sync/srw_lock.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2020, 2021, MariaDB Corporation. +Copyright (c) 2020, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -102,6 +102,13 @@ static inline void srw_pause(unsigned delay) HMT_medium(); } +void sspin_lock::lock() noexcept +{ + while (word.exchange(true, std::memory_order_acquire)) + while (word.load(std::memory_order_relaxed)) + srw_pause(1); +} + #ifdef SUX_LOCK_GENERIC template<> void srw_mutex_impl::wr_wait() {