From ee773753778e741c1aff1449fcd41b775701a4db Mon Sep 17 00:00:00 2001 From: Anel Husakovic Date: Wed, 7 Dec 2022 14:14:24 +0100 Subject: [PATCH 01/24] MDEV-26875: Wrong user in SET DEFAULT ROLE error - Regression introduced in 957cb7b7ba35 - Patch 4abb8216a054 change `mysql.user` to `mysql.global_priv` for `add_anonymous.inc`, update `delete_anonymous.inc`. - Added test case with `--skip-name-resolve` - Add test case with anonymous user - Disable this test for windows, assignes current user to anonymous user. Reviewed by: --- mysql-test/include/delete_anonymous_users.inc | 2 +- .../suite/roles/set_default_role_for.result | 2 +- .../roles/set_default_role_invalid.result | 6 +- .../suite/roles/set_default_role_invalid.test | 1 - ..._role_invalid_skip_name_resolve-master.opt | 1 + ...ault_role_invalid_skip_name_resolve.result | 85 +++++++++++++++++++ ...efault_role_invalid_skip_name_resolve.test | 78 +++++++++++++++++ sql/sql_acl.cc | 16 +++- 8 files changed, 181 insertions(+), 10 deletions(-) create mode 100644 mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve-master.opt create mode 100644 mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve.result create mode 100644 mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve.test diff --git a/mysql-test/include/delete_anonymous_users.inc b/mysql-test/include/delete_anonymous_users.inc index 704e74ae4a3..cc44a01fe8d 100644 --- a/mysql-test/include/delete_anonymous_users.inc +++ b/mysql-test/include/delete_anonymous_users.inc @@ -1,7 +1,7 @@ # Remove anonymous users added by add_anonymous_users.inc disable_warnings; disable_query_log; -DELETE FROM mysql.user where host='localhost' and user=''; +DELETE FROM mysql.global_priv where host='localhost' and user=''; FLUSH PRIVILEGES; enable_query_log; enable_warnings; diff --git a/mysql-test/suite/roles/set_default_role_for.result b/mysql-test/suite/roles/set_default_role_for.result index 57a1471126c..1b133b1baae 100644 --- a/mysql-test/suite/roles/set_default_role_for.result +++ b/mysql-test/suite/roles/set_default_role_for.result @@ -14,7 +14,7 @@ set default role role_a for user_a@localhost; set default role invalid_role for user_a@localhost; ERROR OP000: Invalid role specification `invalid_role` set default role role_b for user_a@localhost; -ERROR OP000: User `root`@`localhost` has not been granted role `role_b` +ERROR OP000: User `user_a`@`localhost` has not been granted role `role_b` set default role role_b for user_b@localhost; show grants; Grants for user_a@localhost diff --git a/mysql-test/suite/roles/set_default_role_invalid.result b/mysql-test/suite/roles/set_default_role_invalid.result index 12e2c035a7d..2cd84cf2ff0 100644 --- a/mysql-test/suite/roles/set_default_role_invalid.result +++ b/mysql-test/suite/roles/set_default_role_invalid.result @@ -48,7 +48,7 @@ CREATE USER b; CREATE ROLE r1; CREATE ROLE r2; SET DEFAULT ROLE r1 FOR a; -ERROR OP000: User `root`@`localhost` has not been granted role `r1` +ERROR OP000: User `a`@`%` has not been granted role `r1` GRANT r1 TO b; GRANT r2 TO b; SET DEFAULT ROLE r1 FOR b; @@ -100,7 +100,7 @@ GRANT USAGE ON *.* TO `b`@`%` GRANT SELECT, UPDATE ON `mysql`.* TO `b`@`%` SET DEFAULT ROLE `r2` FOR `b`@`%` SET DEFAULT ROLE r1 FOR a; -ERROR OP000: User `b`@`%` has not been granted role `r1` +ERROR OP000: User `a`@`%` has not been granted role `r1` SET DEFAULT ROLE invalid_role; ERROR OP000: Invalid role specification `invalid_role` SET DEFAULT ROLE invalid_role FOR a; @@ -117,7 +117,7 @@ SET DEFAULT ROLE None; # Change user b (session 3: role granted to user a) SET DEFAULT ROLE r1 FOR a; SET DEFAULT ROLE r2 FOR a; -ERROR OP000: User `b`@`%` has not been granted role `r2` +ERROR OP000: User `a`@`%` has not been granted role `r2` SET DEFAULT ROLE invalid_role; ERROR OP000: Invalid role specification `invalid_role` SET DEFAULT ROLE invalid_role FOR a; diff --git a/mysql-test/suite/roles/set_default_role_invalid.test b/mysql-test/suite/roles/set_default_role_invalid.test index 02fca1107e2..d2ef01b8f88 100644 --- a/mysql-test/suite/roles/set_default_role_invalid.test +++ b/mysql-test/suite/roles/set_default_role_invalid.test @@ -70,7 +70,6 @@ CREATE USER a; CREATE USER b; CREATE ROLE r1; CREATE ROLE r2; -# Role has not been granted to user a, but the role is visible to current_user --error ER_INVALID_ROLE SET DEFAULT ROLE r1 FOR a; # Granting roles to user b diff --git a/mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve-master.opt b/mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve-master.opt new file mode 100644 index 00000000000..ec008a812ce --- /dev/null +++ b/mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve-master.opt @@ -0,0 +1 @@ +--skip-name-resolve \ No newline at end of file diff --git a/mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve.result b/mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve.result new file mode 100644 index 00000000000..a267e114012 --- /dev/null +++ b/mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve.result @@ -0,0 +1,85 @@ +# +# MDEV-26875: Wrong user in SET DEFAULT ROLE error +# +create user test_user; +create role test_role; +show grants for test_user; +Grants for test_user@% +GRANT USAGE ON *.* TO `test_user`@`%` +set default role test_role for test_user; +ERROR OP000: User `test_user`@`%` has not been granted role `test_role` +grant test_role to test_user; +set default role test_role for test_user; +show grants for test_user; +Grants for test_user@% +GRANT `test_role` TO `test_user`@`%` +GRANT USAGE ON *.* TO `test_user`@`%` +SET DEFAULT ROLE `test_role` FOR `test_user`@`%` +set default role none for test_user; +# +# Try to set default role to role(`test_role`). +-------------------------------------------------------------- +show grants for test_role; +Grants for test_role +GRANT USAGE ON *.* TO `test_role` +create role new_role; +grant new_role to test_role; +show grants for test_role; +Grants for test_role +GRANT `new_role` TO `test_role` +GRANT USAGE ON *.* TO `test_role` +GRANT USAGE ON *.* TO `new_role` +set default role new_role for test_role; +ERROR OP000: User `test_role`@`%` has not been granted role `new_role` +# +# Test of errors, where hostname cannot be resolved `test_user` +-------------------------------------------------------------- +grant test_role to test_user@'%'; +set default role test_role for test_user@'%'; +connect con_test_user,127.0.0.1,test_user,,,$MASTER_MYPORT; +show grants; +Grants for test_user@% +GRANT `test_role` TO `test_user`@`%` +GRANT USAGE ON *.* TO `test_user`@`%` +GRANT `new_role` TO `test_role` +GRANT USAGE ON *.* TO `test_role` +GRANT USAGE ON *.* TO `new_role` +SET DEFAULT ROLE `test_role` FOR `test_user`@`%` +select current_role; +current_role +test_role +set role `new_role`; +ERROR OP000: User `test_user`@`%` has not been granted role `new_role` +connection default; +set default role none for test_user; +disconnect con_test_user; +connect con_test_user,127.0.0.1,test_user,,,$MASTER_MYPORT; +select current_role; +current_role +NULL +set role `new_role`; +ERROR OP000: User `test_user`@`%` has not been granted role `new_role` +connection default; +disconnect con_test_user; +# +# Test of anonymous user connection +-------------------------------------------------------------- +grant test_role to ''@localhost; +connect con1,localhost,'',,,$MASTER_MYPORT; +SELECT CURRENT_ROLE; +CURRENT_ROLE +NULL +SET role test_role; +SELECT CURRENT_ROLE; +CURRENT_ROLE +test_role +SET role new_role; +ERROR OP000: User ``@`localhost` has not been granted role `new_role` +set default role test_role for ''@localhost; +ERROR 42000: You are using MariaDB as an anonymous user and anonymous users are not allowed to modify user settings +connection default; +disconnect con1; +REVOKE all privileges, grant option from ''@localhost; +drop role new_role; +drop role test_role; +drop user test_user; diff --git a/mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve.test b/mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve.test new file mode 100644 index 00000000000..5b4b14d3377 --- /dev/null +++ b/mysql-test/suite/roles/set_default_role_invalid_skip_name_resolve.test @@ -0,0 +1,78 @@ +source include/not_embedded.inc; + +--echo # +--echo # MDEV-26875: Wrong user in SET DEFAULT ROLE error +--echo # +create user test_user; +create role test_role; +show grants for test_user; +--error ER_INVALID_ROLE +set default role test_role for test_user; +grant test_role to test_user; +set default role test_role for test_user; +show grants for test_user; +set default role none for test_user; + +--echo # +--echo # Try to set default role to role(`test_role`). +--echo -------------------------------------------------------------- +show grants for test_role; +create role new_role; +grant new_role to test_role; +show grants for test_role; +# One can not set role to a role +--error ER_INVALID_ROLE +set default role new_role for test_role; + +--echo # +--echo # Test of errors, where hostname cannot be resolved `test_user` +--echo -------------------------------------------------------------- +# `new_role` is granted to `test_role` +grant test_role to test_user@'%'; +set default role test_role for test_user@'%'; + +connect con_test_user,127.0.0.1,test_user,,,$MASTER_MYPORT; +show grants; +select current_role; +# `test_user` indirectly granted `new_role` +--error ER_INVALID_ROLE +set role `new_role`; + +connection default; +set default role none for test_user; +disconnect con_test_user; + +connect con_test_user,127.0.0.1,test_user,,,$MASTER_MYPORT; +select current_role; +--error ER_INVALID_ROLE +set role `new_role`; + +connection default; +disconnect con_test_user; + +--echo # +--echo # Test of anonymous user connection +--echo -------------------------------------------------------------- +--source include/add_anonymous_users.inc +# Skip windows, since it uses current user `Administrator` in buildbot. +--source include/not_windows.inc +grant test_role to ''@localhost; + +connect(con1,localhost,'',,,$MASTER_MYPORT); +SELECT CURRENT_ROLE; +SET role test_role; +SELECT CURRENT_ROLE; +# user cannot set subset role, since it is not granted explicitly +--error ER_INVALID_ROLE +SET role new_role; +--error ER_PASSWORD_ANONYMOUS_USER +set default role test_role for ''@localhost; + +connection default; +disconnect con1; +REVOKE all privileges, grant option from ''@localhost; +--source include/delete_anonymous_users.inc + +drop role new_role; +drop role test_role; +drop user test_user; diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 5468c19f531..3735d9e965d 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -3339,10 +3339,18 @@ end: check_role_is_granted_callback, NULL) == -1)) { - /* Role is not granted but current user can see the role */ - my_printf_error(ER_INVALID_ROLE, "User %`s@%`s has not been granted role %`s", - MYF(0), thd->security_ctx->priv_user, - thd->security_ctx->priv_host, rolename); + /* This happens for SET ROLE case and when `--skip-name-resolve` option + is used. In that situation host can be NULL and current user is always + target user, so printing `priv_user@priv_host` is not incorrect. + */ + if (!host) + my_printf_error(ER_INVALID_ROLE, "User %`s@%`s has not been granted role %`s", + MYF(0), thd->security_ctx->priv_user, + thd->security_ctx->priv_host, rolename); + else + /* Role is not granted but current user can see the role */ + my_printf_error(ER_INVALID_ROLE, "User %`s@%`s has not been granted role %`s", + MYF(0), user, host, rolename); } else { From 4eb8aeee8e15f0267b19487bc0953f27136f6737 Mon Sep 17 00:00:00 2001 From: Anel Husakovic Date: Tue, 17 Oct 2023 15:41:52 +0200 Subject: [PATCH 02/24] MDEV-32462: mysql_upgrade -s still checks for non system tables - Prevent opening of any user tables in case `upgrade-system-table` option is used. - Still there may be uninstalled data types in `mysql` system table so allow it to perform. - Closes PR #2790 - Reviewer: , --- client/mysql_upgrade.c | 2 + ...ql_json_mysql_upgrade_system_tables.result | 93 +++++++++++++++++++ ...ysql_json_mysql_upgrade_system_tables.test | 52 +++++++++++ 3 files changed, 147 insertions(+) create mode 100644 mysql-test/main/mysql_json_mysql_upgrade_system_tables.result create mode 100644 mysql-test/main/mysql_json_mysql_upgrade_system_tables.test diff --git a/client/mysql_upgrade.c b/client/mysql_upgrade.c index 60eb61938d4..617d5464e34 100644 --- a/client/mysql_upgrade.c +++ b/client/mysql_upgrade.c @@ -1155,6 +1155,8 @@ static int install_used_plugin_data_types(void) DYNAMIC_STRING ds_result; const char *query = "SELECT table_comment FROM information_schema.tables" " WHERE table_comment LIKE 'Unknown data type: %'"; + if (opt_systables_only) + return 0; if (init_dynamic_string(&ds_result, "", 512, 512)) die("Out of memory"); run_query(query, &ds_result, TRUE); diff --git a/mysql-test/main/mysql_json_mysql_upgrade_system_tables.result b/mysql-test/main/mysql_json_mysql_upgrade_system_tables.result new file mode 100644 index 00000000000..95ca68f43a6 --- /dev/null +++ b/mysql-test/main/mysql_json_mysql_upgrade_system_tables.result @@ -0,0 +1,93 @@ +# +# MDEV-32462: mysql_upgrade -s still checks for non system tables +# +call mtr.add_suppression("Table rebuild required"); +SET NAMES utf8; +# mariadb_upgrade on system and user table +show tables from mysql like '%json%'; +Tables_in_mysql (%json%) +mysql_json_test +use mysql; +show create table mysql.mysql_json_test; +ERROR HY000: Unknown data type: 'MYSQL_JSON' +show create table test.mysql_json_test; +ERROR HY000: Unknown data type: 'MYSQL_JSON' +SET @old_general_log= @@global.general_log; +SET @old_log_output= @@global.log_output; +SET @@global.general_log = ON; +SET @@global.log_output = "TABLE"; +The --upgrade-system-tables option was used, user tables won't be touched. +Phase 1/7: Checking and upgrading mysql database +Processing databases +mysql +mysql.column_stats OK +mysql.columns_priv OK +mysql.db OK +mysql.event OK +mysql.func OK +mysql.global_priv OK +mysql.gtid_slave_pos OK +mysql.help_category OK +mysql.help_keyword OK +mysql.help_relation OK +mysql.help_topic OK +mysql.index_stats OK +mysql.innodb_index_stats +Error : Unknown storage engine 'InnoDB' +error : Corrupt +mysql.innodb_table_stats +Error : Unknown storage engine 'InnoDB' +error : Corrupt +mysql.mysql_json_test +Error : Unknown data type: 'MYSQL_JSON' +error : Corrupt +mysql.plugin OK +mysql.proc OK +mysql.procs_priv OK +mysql.proxies_priv OK +mysql.roles_mapping OK +mysql.servers OK +mysql.table_stats OK +mysql.tables_priv OK +mysql.time_zone OK +mysql.time_zone_leap_second OK +mysql.time_zone_name OK +mysql.time_zone_transition OK +mysql.time_zone_transition_type OK +mysql.transaction_registry +Error : Unknown storage engine 'InnoDB' +error : Corrupt + +Repairing tables +mysql.innodb_index_stats +Error : Unknown storage engine 'InnoDB' +error : Corrupt +mysql.innodb_table_stats +Error : Unknown storage engine 'InnoDB' +error : Corrupt +mysql.mysql_json_test +Error : Unknown data type: 'MYSQL_JSON' +error : Corrupt +mysql.transaction_registry +Error : Unknown storage engine 'InnoDB' +error : Corrupt +Phase 2/7: Installing used storage engines... Skipped +Phase 3/7: Fixing views... Skipped +Phase 4/7: Running 'mysql_fix_privilege_tables' +Phase 5/7: Fixing table and database names ... Skipped +Phase 6/7: Checking and upgrading tables... Skipped +Phase 7/7: Running 'FLUSH PRIVILEGES' +OK +SET @@global.general_log = @old_general_log; +SET @@global.log_output = @old_log_output; +select command_type, argument from mysql.general_log where argument like "%SELECT table_comment FROM information_schema.tables%"; +command_type argument +show create table mysql.mysql_json_test; +ERROR HY000: Unknown data type: 'MYSQL_JSON' +show create table test.mysql_json_test; +ERROR HY000: Unknown data type: 'MYSQL_JSON' +drop table mysql.mysql_json_test; +drop table test.mysql_json_test; +# +# End of 10.5 tests +# diff --git a/mysql-test/main/mysql_json_mysql_upgrade_system_tables.test b/mysql-test/main/mysql_json_mysql_upgrade_system_tables.test new file mode 100644 index 00000000000..6ae3e8ddea1 --- /dev/null +++ b/mysql-test/main/mysql_json_mysql_upgrade_system_tables.test @@ -0,0 +1,52 @@ +--echo # +--echo # MDEV-32462: mysql_upgrade -s still checks for non system tables +--echo # + +# Let's now load plugin first +--source include/have_utf8.inc +--source include/not_embedded.inc + +--source include/mysql_upgrade_preparation.inc +call mtr.add_suppression("Table rebuild required"); + +SET NAMES utf8; + +let $MYSQLD_DATADIR= `select @@datadir`; + +--echo # mariadb_upgrade on system and user table +--copy_file std_data/mysql_json/mysql_json_test.frm $MYSQLD_DATADIR/mysql/mysql_json_test.frm +--copy_file std_data/mysql_json/mysql_json_test.MYI $MYSQLD_DATADIR/mysql/mysql_json_test.MYI +--copy_file std_data/mysql_json/mysql_json_test.MYD $MYSQLD_DATADIR/mysql/mysql_json_test.MYD +--copy_file std_data/mysql_json/mysql_json_test.frm $MYSQLD_DATADIR/test/mysql_json_test.frm +--copy_file std_data/mysql_json/mysql_json_test.MYI $MYSQLD_DATADIR/test/mysql_json_test.MYI +--copy_file std_data/mysql_json/mysql_json_test.MYD $MYSQLD_DATADIR/test/mysql_json_test.MYD + +show tables from mysql like '%json%'; +use mysql; +--error ER_UNKNOWN_DATA_TYPE +show create table mysql.mysql_json_test; +--error ER_UNKNOWN_DATA_TYPE +show create table test.mysql_json_test; + +SET @old_general_log= @@global.general_log; +SET @old_log_output= @@global.log_output; +SET @@global.general_log = ON; +SET @@global.log_output = "TABLE"; +--exec $MYSQL_UPGRADE -s --force 2>&1 +--remove_file $MYSQLD_DATADIR/mysql_upgrade_info +SET @@global.general_log = @old_general_log; +SET @@global.log_output = @old_log_output; + +select command_type, argument from mysql.general_log where argument like "%SELECT table_comment FROM information_schema.tables%"; + +# User table is not upgraded in `mysql\test` DB, so we cannot see it. +--error ER_UNKNOWN_DATA_TYPE +show create table mysql.mysql_json_test; +--error ER_UNKNOWN_DATA_TYPE +show create table test.mysql_json_test; +drop table mysql.mysql_json_test; +drop table test.mysql_json_test; + +--echo # +--echo # End of 10.5 tests +--echo # From b06ac9a8cd2146e89270cc2150d306d8ed1b33fb Mon Sep 17 00:00:00 2001 From: Anel Husakovic Date: Thu, 19 Oct 2023 09:21:18 +0200 Subject: [PATCH 03/24] MDEV-32462: mysql_upgrade -s still checks for non system tables - Rename files as requested by Vicentiu: ``` mysql_json_mysql_upgrade.test -> mysql_upgrade_mysql_json.test mysql_json_mysql_upgrade_with_plugin_loaded.test -> mysql_upgrade_mysql_json_with_plugin_loaded.test mysql_json_mysql_upgrade_system_tables.test -> mysql_upgrade_mysql_json_system_tables.test ``` - Related to PR #2790 - Reviewer: , --- ..._json_mysql_upgrade.result => mysql_upgrade_mysql_json.result} | 0 ...ysql_json_mysql_upgrade.test => mysql_upgrade_mysql_json.test} | 0 ...ables.result => mysql_upgrade_mysql_json_system_tables.result} | 0 ...em_tables.test => mysql_upgrade_mysql_json_system_tables.test} | 0 ....result => mysql_upgrade_mysql_json_with_plugin_loaded.result} | 0 ...aded.test => mysql_upgrade_mysql_json_with_plugin_loaded.test} | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename mysql-test/main/{mysql_json_mysql_upgrade.result => mysql_upgrade_mysql_json.result} (100%) rename mysql-test/main/{mysql_json_mysql_upgrade.test => mysql_upgrade_mysql_json.test} (100%) rename mysql-test/main/{mysql_json_mysql_upgrade_system_tables.result => mysql_upgrade_mysql_json_system_tables.result} (100%) rename mysql-test/main/{mysql_json_mysql_upgrade_system_tables.test => mysql_upgrade_mysql_json_system_tables.test} (100%) rename mysql-test/main/{mysql_json_mysql_upgrade_with_plugin_loaded.result => mysql_upgrade_mysql_json_with_plugin_loaded.result} (100%) rename mysql-test/main/{mysql_json_mysql_upgrade_with_plugin_loaded.test => mysql_upgrade_mysql_json_with_plugin_loaded.test} (100%) diff --git a/mysql-test/main/mysql_json_mysql_upgrade.result b/mysql-test/main/mysql_upgrade_mysql_json.result similarity index 100% rename from mysql-test/main/mysql_json_mysql_upgrade.result rename to mysql-test/main/mysql_upgrade_mysql_json.result diff --git a/mysql-test/main/mysql_json_mysql_upgrade.test b/mysql-test/main/mysql_upgrade_mysql_json.test similarity index 100% rename from mysql-test/main/mysql_json_mysql_upgrade.test rename to mysql-test/main/mysql_upgrade_mysql_json.test diff --git a/mysql-test/main/mysql_json_mysql_upgrade_system_tables.result b/mysql-test/main/mysql_upgrade_mysql_json_system_tables.result similarity index 100% rename from mysql-test/main/mysql_json_mysql_upgrade_system_tables.result rename to mysql-test/main/mysql_upgrade_mysql_json_system_tables.result diff --git a/mysql-test/main/mysql_json_mysql_upgrade_system_tables.test b/mysql-test/main/mysql_upgrade_mysql_json_system_tables.test similarity index 100% rename from mysql-test/main/mysql_json_mysql_upgrade_system_tables.test rename to mysql-test/main/mysql_upgrade_mysql_json_system_tables.test diff --git a/mysql-test/main/mysql_json_mysql_upgrade_with_plugin_loaded.result b/mysql-test/main/mysql_upgrade_mysql_json_with_plugin_loaded.result similarity index 100% rename from mysql-test/main/mysql_json_mysql_upgrade_with_plugin_loaded.result rename to mysql-test/main/mysql_upgrade_mysql_json_with_plugin_loaded.result diff --git a/mysql-test/main/mysql_json_mysql_upgrade_with_plugin_loaded.test b/mysql-test/main/mysql_upgrade_mysql_json_with_plugin_loaded.test similarity index 100% rename from mysql-test/main/mysql_json_mysql_upgrade_with_plugin_loaded.test rename to mysql-test/main/mysql_upgrade_mysql_json_with_plugin_loaded.test From 3c1f324a5560d106f4918eda2d42eb626ab9ed57 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Fri, 3 Nov 2023 14:43:45 +0100 Subject: [PATCH 04/24] MDEV-32672: Don't hold LOCK_thd_data over commit_ordered Partial revert of this commit: commit 6b685ea7b0776430d45b095cb4be3ef0739a3c04 Author: Sergei Golubchik Date: Wed Sep 28 18:55:15 2022 +0200 Don't hold LOCK_thd_data over run_commit_ordered(). Holding the mutex is unnecessary and will deadlock if any code in a commit_ordered handlerton call tries to take the mutex to change THD local data. Instead, set the current_thd for the duration of the call to keep asserts happy around LOCK_thd_data. Signed-off-by: Kristian Nielsen --- sql/log.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/log.cc b/sql/log.cc index 12e2aee94cc..277d8de6701 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -8556,13 +8556,10 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered"); ++num_commits; + set_current_thd(current->thd); if (current->cache_mngr->using_xa && likely(!current->error) && DBUG_EVALUATE_IF("skip_commit_ordered", 0, 1)) - { - mysql_mutex_lock(¤t->thd->LOCK_thd_data); run_commit_ordered(current->thd, current->all); - mysql_mutex_unlock(¤t->thd->LOCK_thd_data); - } current->thd->wakeup_subsequent_commits(current->error); /* @@ -8579,6 +8576,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) } current= next; } + set_current_thd(leader->thd); DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered"); mysql_mutex_unlock(&LOCK_commit_ordered); DEBUG_SYNC(leader->thd, "commit_after_group_release_commit_ordered"); From c68620df48cd1b0e7f5f1cb5ff8b905a5d9c80a5 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sat, 4 Nov 2023 20:40:31 +0100 Subject: [PATCH 05/24] Fix random test failures in testcase perfschema.mdl_func The test case can get extra rows in its output from table performance_schema.table_handles, left there by an earlier test case (for example main.long_unique_delayed). So force a server restart at the beginning of the test. Signed-off-by: Kristian Nielsen --- mysql-test/suite/perfschema/r/mdl_func.result | 1 + mysql-test/suite/perfschema/t/mdl_func.test | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/mysql-test/suite/perfschema/r/mdl_func.result b/mysql-test/suite/perfschema/r/mdl_func.result index 4887b15efa5..f3fc0d10a96 100644 --- a/mysql-test/suite/perfschema/r/mdl_func.result +++ b/mysql-test/suite/perfschema/r/mdl_func.result @@ -1,3 +1,4 @@ +# restart UPDATE performance_schema.setup_instruments SET enabled = 'NO', timed = 'YES'; UPDATE performance_schema.setup_instruments SET enabled = 'YES' WHERE name in ('wait/io/table/sql/handler', diff --git a/mysql-test/suite/perfschema/t/mdl_func.test b/mysql-test/suite/perfschema/t/mdl_func.test index c7f5585a5c2..209c2f112af 100644 --- a/mysql-test/suite/perfschema/t/mdl_func.test +++ b/mysql-test/suite/perfschema/t/mdl_func.test @@ -1,5 +1,11 @@ --source include/not_embedded.inc --source include/have_perfschema.inc +# This test needs a fresh restart. The table performance_schema.table_handles +# can otherwise contain extra rows left from previous testcases. +# For example the test case main.long_unique_delayed, which uses +# INSERT DELAYED, will leave extra rows in this table if run just before this +# test, causing .result diff failure. +--source include/restart_mysqld.inc UPDATE performance_schema.setup_instruments SET enabled = 'NO', timed = 'YES'; From 04477bd9364e7a753918c730b89e7012b1926e6a Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Wed, 8 Nov 2023 12:00:29 +0100 Subject: [PATCH 06/24] MDEV-28836 fix crashing PFS unit tests on Windows. In stub_pfs_global, use aligned_free() for pfs_free(), to match aligned_malloc() in pfs_malloc() --- storage/perfschema/unittest/stub_pfs_global.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/perfschema/unittest/stub_pfs_global.h b/storage/perfschema/unittest/stub_pfs_global.h index 4b792f9bfef..e6876dccfe6 100644 --- a/storage/perfschema/unittest/stub_pfs_global.h +++ b/storage/perfschema/unittest/stub_pfs_global.h @@ -58,7 +58,7 @@ void *pfs_malloc(PFS_builtin_memory_class *klass, size_t size, myf) void pfs_free(PFS_builtin_memory_class *, size_t, void *ptr) { if (ptr != NULL) - free(ptr); + aligned_free(ptr); } void *pfs_malloc_array(PFS_builtin_memory_class *klass, size_t n, size_t size, myf flags) From e0c65784aa78ea91f7e0ef400bbda9562d07c0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 9 Nov 2023 11:06:17 +0200 Subject: [PATCH 07/24] MDEV-32737 innodb.log_file_name fails on Assertion `after_apply || !(blocks).end in recv_sys_t::clear recv_group_scan_log_recs(): Set the debug flag recv_sys.after_apply after actually completing the log scan. In the test, suppress some errors that may be reported when the crash recovery of RENAME TABLE t1 TO t2 is preceded by copying t2.ibd to t1.ibd. --- mysql-test/suite/innodb/t/log_file_name.test | 2 ++ storage/innobase/log/log0recv.cc | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/innodb/t/log_file_name.test b/mysql-test/suite/innodb/t/log_file_name.test index c153c38e54b..11eca0a7a12 100644 --- a/mysql-test/suite/innodb/t/log_file_name.test +++ b/mysql-test/suite/innodb/t/log_file_name.test @@ -170,6 +170,8 @@ call mtr.add_suppression("InnoDB: Plugin initialization aborted"); call mtr.add_suppression("Plugin 'InnoDB' \(init function returned error\|registration as a STORAGE ENGINE failed\)"); call mtr.add_suppression("InnoDB: Table test/u[123] in the InnoDB data dictionary has tablespace id [1-9][0-9]*, but tablespace with that id or name does not exist\\. Have you deleted or moved \\.ibd files\\?"); call mtr.add_suppression("InnoDB: Cannot replay rename of tablespace.*"); +call mtr.add_suppression("InnoDB: Attempted to open a previously opened tablespace"); +call mtr.add_suppression("InnoDB: Recovery cannot access file"); FLUSH TABLES; --enable_query_log diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index a213b4842ce..59c4682552f 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -3203,7 +3203,6 @@ recv_group_scan_log_recs( log_sys.log.scanned_lsn = end_lsn = *contiguous_lsn = ut_uint64_align_down(*contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE); - ut_d(recv_sys.after_apply = last_phase); do { if (last_phase && store == STORE_NO) { @@ -3226,6 +3225,7 @@ recv_group_scan_log_recs( DBUG_RETURN(false); } + ut_d(recv_sys.after_apply = last_phase); DBUG_PRINT("ib_log", ("%s " LSN_PF " completed", last_phase ? "rescan" : "scan", log_sys.log.scanned_lsn)); From 9d8f659f80ec096ab61c393f807b4dae185d7fd1 Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Fri, 10 Nov 2023 12:40:21 +0100 Subject: [PATCH 08/24] galera: post-fix after migrating changes from 10.4 --- sql/rpl_gtid.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 26a86c60608..3f519f67eb6 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -703,6 +703,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, if (WSREP_ON_ && wsrep_thd_is_local(thd)) { thd->wsrep_ignore_table= false; + table->file->row_logging= 1; // replication requires binary logging wsrep_start_trx_if_not_started(thd); } else From c3fdfbdbd82c43f867ed82232e3b3077c485c5e1 Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Fri, 10 Nov 2023 13:54:25 +0100 Subject: [PATCH 09/24] MDEV-31413: post-fix for 10.5+ (galera_restart_replica test failures) --- mysql-test/suite/galera/t/galera_restart_replica.test | 2 +- sql/rpl_gtid.cc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/galera/t/galera_restart_replica.test b/mysql-test/suite/galera/t/galera_restart_replica.test index 2cc3a1dcff2..37cfd9bc0f9 100644 --- a/mysql-test/suite/galera/t/galera_restart_replica.test +++ b/mysql-test/suite/galera/t/galera_restart_replica.test @@ -3,9 +3,9 @@ # # The galera/galera_2node_slave.cnf describes the setup of the nodes # ---source include/big_test.inc --source include/force_restart.inc --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/have_sequence.inc # As node #3 is not a Galera node, and galera_cluster.inc does not open connetion to it diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 26a86c60608..3f519f67eb6 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -703,6 +703,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, if (WSREP_ON_ && wsrep_thd_is_local(thd)) { thd->wsrep_ignore_table= false; + table->file->row_logging= 1; // replication requires binary logging wsrep_start_trx_if_not_started(thd); } else From 9da247b2e727ce87c1c10d81ec54601d473ca896 Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Sat, 11 Nov 2023 15:37:29 +0100 Subject: [PATCH 10/24] galera: cleanup of the lists of disabled tests --- mysql-test/suite/galera/disabled.def | 14 ++++++++------ mysql-test/suite/galera_3nodes/disabled.def | 7 +++---- mysql-test/suite/galera_3nodes_sr/disabled.def | 2 +- mysql-test/suite/galera_sr/disabled.def | 3 +-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def index 104a631f8b7..c5412874929 100644 --- a/mysql-test/suite/galera/disabled.def +++ b/mysql-test/suite/galera/disabled.def @@ -14,9 +14,11 @@ galera_as_slave_ctas : MDEV-28378 timeout galera_pc_recovery : MDEV-25199 cluster fails to start up galera_sst_encrypted : MDEV-29876 Galera test failure on galera_sst_encrypted galera_var_node_address : MDEV-20485 Galera test failure -MDEV-26575 : MDEV-29878 Galera test failure on MDEV-26575 -galera_bf_abort_shutdown : MDEV-29918 Assertion failure on galera_bf_abort_shutdown -galera_var_ignore_apply_errors : 28: "Server did not transition to READY state" -versioning_trx_id : MDEV-18590: galera.versioning_trx_id: Test failure: mysqltest: Result content mismatch -MDEV-27713 : test is using get_lock(), which is now rejected in cluster -galera_bf_abort_group_commit : MDEV-30855 PR to remove the test exists +galera_sequences : MDEV-32561 WSREP FSM failure: no such a transition REPLICATING -> COMMITTED +galera_shutdown_nonprim : MDEV-32635 galera_shutdown_nonprim: mysql_shutdown failed +versioning_trx_id : MDEV-18590 : galera.versioning_trx_id: Test failure: mysqltest: Result content mismatch +galera_concurrent_ctas : MDEV-32779 galera_concurrent_ctas: assertion in the galera::ReplicatorSMM::finish_cert() +galera_as_slave_replay : MDEV-32780 galera_as_slave_replay: assertion in the wsrep::transaction::before_rollback() +galera_slave_replay : MDEV-32780 galera_as_slave_replay: assertion in the wsrep::transaction::before_rollback() +galera_bf_lock_wait : MDEV-32781 galera_bf_lock_wait test failed +galera_sst_mysqldump_with_key : MDEV-32782 galera_sst_mysqldump_with_key test failed diff --git a/mysql-test/suite/galera_3nodes/disabled.def b/mysql-test/suite/galera_3nodes/disabled.def index dc08e0981bd..3986bc69dcf 100644 --- a/mysql-test/suite/galera_3nodes/disabled.def +++ b/mysql-test/suite/galera_3nodes/disabled.def @@ -10,8 +10,7 @@ # ############################################################################## - -galera_2_cluster : MDEV-29877 Galera test failure on galera_2_cluster -galera_gtid_2_cluster : MDEV-29877 Galera test failure on galera_2_cluster +galera_2_cluster : MDEV-32631 galera_2_cluster: before_rollback(): Assertion `0' failed +galera_gtid_2_cluster : MDEV-32633 galera_gtid_2_cluster: Assertion `thd->wsrep_next_trx_id() != (0x7fffffffffffffffLL * 2ULL + 1)' galera_vote_rejoin_mysqldump : MDEV-24481: galera_3nodes.galera_vote_rejoin_mysqldump MTR failed: mysql_shutdown failed -galera_ssl_reload : MDEV-30172 At line 50: mysql_shutdown failed +galera_ssl_reload : MDEV-32778 galera_ssl_reload failed with warning message diff --git a/mysql-test/suite/galera_3nodes_sr/disabled.def b/mysql-test/suite/galera_3nodes_sr/disabled.def index df2277fb8ad..4472d960d9f 100644 --- a/mysql-test/suite/galera_3nodes_sr/disabled.def +++ b/mysql-test/suite/galera_3nodes_sr/disabled.def @@ -10,4 +10,4 @@ # ############################################################################## -galera_sr_kill_slave_after_apply_rollback2 : MDEV-29892 Galera test failure on galera_sr_kill_slave_after_apply_rollback2 \ No newline at end of file +galera_sr_kill_slave_after_apply_rollback2 : MDEV-29892 Galera test failure on galera_sr_kill_slave_after_apply_rollback2 diff --git a/mysql-test/suite/galera_sr/disabled.def b/mysql-test/suite/galera_sr/disabled.def index 1132a0097c4..fa62168d474 100644 --- a/mysql-test/suite/galera_sr/disabled.def +++ b/mysql-test/suite/galera_sr/disabled.def @@ -10,6 +10,5 @@ # ############################################################################## -GCF-1060 : MDEV-26528 wrong usage of mutex LOCK_thd_kill and LOCK_thd_kill +GCF-1060 : MDEV-32160 GCF-1060 test failure due to wsrep MDL conflict galera_sr_cc_master : MDEV-29882 Galera test failure on galera_sr_cc_master -mysql-wsrep-features#138 : At line 25: query 'DROP TABLE t1' failed: 2013: Lost connection to MySQL server during query From dec4d0badc2ef79eb3746f8bfa2a29d351e8b6e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 13 Nov 2023 14:35:33 +0200 Subject: [PATCH 11/24] MDEV-32788: Debug build failure with SUX_LOCK_GENERIC Fixes up commit 2027c482de0a75ff09fd08704cdbe36dcf5db91e --- storage/innobase/trx/trx0purge.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 592b8a5371c..0f55892a105 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -852,7 +852,9 @@ void purge_sys_t::rseg_get_next_history_log() { fil_addr_t prev_log_addr; +#ifndef SUX_LOCK_GENERIC ut_ad(rseg->latch.is_write_locked()); +#endif ut_a(rseg->last_page_no != FIL_NULL); tail.trx_no= rseg->last_trx_no() + 1; @@ -968,7 +970,9 @@ inline trx_purge_rec_t purge_sys_t::get_next_rec(roll_ptr_t roll_ptr) { ut_ad(next_stored); ut_ad(tail.trx_no < low_limit_no()); +#ifndef SUX_LOCK_GENERIC ut_ad(rseg->latch.is_write_locked()); +#endif if (!offset) { From 8b84fb13837d0964211d52916e2eea82bb3d2e16 Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 13 Nov 2023 13:21:35 -0500 Subject: [PATCH 12/24] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0f8a4608896..e7b29db2217 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=23 +MYSQL_VERSION_PATCH=24 SERVER_MATURITY=stable From 91835ef3788a76d7396b00335458610b315517b7 Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 13 Nov 2023 13:22:44 -0500 Subject: [PATCH 13/24] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index c8596588b6b..9402abb61e9 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=6 -MYSQL_VERSION_PATCH=16 +MYSQL_VERSION_PATCH=17 SERVER_MATURITY=stable From c638051d808a7b29b10e231c9eae97d5909d17f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 14 Nov 2023 14:35:51 +0200 Subject: [PATCH 14/24] MDEV-32798 innodb_fast_shutdown=0 hang after incomplete startup innodb_preshutdown(): Only wait for active transactions to be terminated if InnoDB was started and innodb_force_recovery=3 or larger does not prevent a rollback. This fixes the following: ./mtr --parallel=auto --mysqld=--innodb-fast-shutdown=0 \ innodb.log_file_size innodb.innodb_force_recovery \ innodb.read_only_recovery innodb.read_only_recover_committed \ mariabackup.apply-log-only-incr --- storage/innobase/srv/srv0start.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 9b8ce4b2fa8..6f40df82f8f 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1995,7 +1995,7 @@ void innodb_preshutdown() better prevent any further changes from being buffered. */ innodb_change_buffering= 0; - if (trx_sys.is_initialised()) + if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO && srv_was_started) while (trx_sys.any_active_transactions()) os_thread_sleep(1000); } From c11610f5900cdb35e1a5410ed0b4ff9d684c9729 Mon Sep 17 00:00:00 2001 From: Tuukka Pasanen Date: Mon, 6 Nov 2023 09:06:00 +0200 Subject: [PATCH 15/24] MDEV-32689: Remove Ubuntu Bionic Commit Removed Ubuntu Bionic from debian/autobake-debs.sh as it's not used anymore to build official MariaDB images --- debian/autobake-deb.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/debian/autobake-deb.sh b/debian/autobake-deb.sh index 0ceabf8b1de..4213593a4b2 100755 --- a/debian/autobake-deb.sh +++ b/debian/autobake-deb.sh @@ -122,10 +122,6 @@ in # there is intentionally no customizations whatsoever. ;; # Ubuntu - "bionic") - remove_rocksdb_tools - [ "$architecture" != amd64 ] && disable_pmem - ;& "focal") replace_uring_with_aio ;& From 15bb8acf8f84c10a8c977d70a412ed1e756dd54a Mon Sep 17 00:00:00 2001 From: Tuukka Pasanen Date: Wed, 15 Nov 2023 10:49:39 +0200 Subject: [PATCH 16/24] MDEV-32689: Remove Ubuntu Bionic from 10.5 Commit Removed Ubuntu Bionic from debian/autobake-debs.sh as it's not used anymore to build official MariaDB images REMINDER TO MERGER: This commit should not be merged up to 10.6 or forward --- debian/autobake-deb.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/debian/autobake-deb.sh b/debian/autobake-deb.sh index aa5a6651cea..994167a826b 100755 --- a/debian/autobake-deb.sh +++ b/debian/autobake-deb.sh @@ -76,9 +76,6 @@ case "${LSBNAME}" in # need to match here to avoid the default Error however ;; # UBUNTU - bionic) - remove_rocksdb_tools - ;& focal) ;& impish|jammy|kinetic) From a0f02f743848fb6ef6d2a51960f77e1e38da1682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 15 Nov 2023 12:23:35 +0200 Subject: [PATCH 17/24] MDEV-32757 innodb_undo_log_truncate=ON is not crash safe trx_purge_truncate_history(): Do not prematurely mark dirty pages as clean. This will be done in mtr_t::commit_shrink() as part of Shrink::operator()(mtr_memo_slot_t*). Also, register each dirty page only once in the mini-transaction. fsp_page_create(): Adjust and simplify the page creation during undo tablespace truncation. We can directly reuse pages that are already in buf_pool.page_hash. This fixes a regression that was caused by commit f5794e1dc6e3d27405daeae850b8e69fd631b62d (MDEV-26445). Tested by: Matthias Leich Reviewed by: Thirunarayanan Balathandayuthapani --- storage/innobase/fsp/fsp0fsp.cc | 50 ++++++++++++++++++++++--------- storage/innobase/trx/trx0purge.cc | 15 ++++------ 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 134c0bbb8db..c5d04a19662 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -1054,7 +1054,7 @@ static buf_block_t* fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr) { - buf_block_t *block, *free_block; + buf_block_t *block; if (UNIV_UNLIKELY(space->is_being_truncated)) { @@ -1063,26 +1063,48 @@ fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr) mysql_mutex_lock(&buf_pool.mutex); block= reinterpret_cast (buf_pool.page_hash_get_low(page_id, fold)); - if (block && block->page.oldest_modification() <= 1) - block= nullptr; + if (!block) + { + mysql_mutex_unlock(&buf_pool.mutex); + goto create; + } + + buf_block_buf_fix_inc(block, __FILE__, __LINE__); mysql_mutex_unlock(&buf_pool.mutex); - if (block) + ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(!block->page.ibuf_exist); +#ifdef BTR_CUR_HASH_ADAPT + ut_ad(!block->index); +#endif + + if (mtr->have_x_latch(*block)) { + buf_block_buf_fix_dec(block); ut_ad(block->page.buf_fix_count() >= 1); ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1); - ut_ad(mtr->have_x_latch(*block)); - free_block= block; - goto got_free_block; + } + else + { + rw_lock_x_lock(&block->lock); + if (UNIV_UNLIKELY(block->page.id() != page_id)) + { + buf_block_buf_fix_dec(block); + rw_lock_x_unlock(&block->lock); + goto create; + } + mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); } } - - free_block= buf_LRU_get_free_block(false); -got_free_block: - block= buf_page_create(space, static_cast(offset), - space->zip_size(), mtr, free_block); - if (UNIV_UNLIKELY(block != free_block)) - buf_pool.free_block(free_block); + else + { + create: + buf_block_t *free_block= buf_LRU_get_free_block(false); + block= buf_page_create(space, static_cast(offset), + space->zip_size(), mtr, free_block); + if (UNIV_UNLIKELY(block != free_block)) + buf_pool.free_block(free_block); + } fsp_init_file_page(space, block, mtr); return block; diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index a297ca4921e..4a98dee573e 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -659,13 +659,12 @@ void trx_purge_truncate_history() buf_page_t *prev= UT_LIST_GET_PREV(list, bpage); - if (bpage->id().space() == space.id && - bpage->oldest_modification() != 1) + if (bpage->oldest_modification() > 2 && + bpage->id().space() == space.id) { ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE); auto block= reinterpret_cast(bpage); - block->fix(); - ut_ad(rw_lock_s_lock_nowait(block->debug_latch, __FILE__, __LINE__)); + buf_block_buf_fix_inc(block, __FILE__, __LINE__); buf_pool.flush_hp.set(prev); mysql_mutex_unlock(&buf_pool.flush_list_mutex); @@ -676,15 +675,13 @@ void trx_purge_truncate_history() mysql_mutex_lock(&buf_pool.flush_list_mutex); ut_ad(bpage->io_fix() == BUF_IO_NONE); - if (bpage->oldest_modification() > 1) - { - bpage->clear_oldest_modification(false); + if (bpage->oldest_modification() > 2 && + !mtr.have_x_latch(*reinterpret_cast(bpage))) mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); - } else { + buf_block_buf_fix_dec(block); rw_lock_x_unlock(&block->lock); - block->unfix(); } if (prev != buf_pool.flush_hp.get()) From ea6ca0139745068d2c6fe06ecc8d71cc0d371d47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 15 Nov 2023 14:11:38 +0200 Subject: [PATCH 18/24] MDEV-32757: rollback crash on corruption trx_undo_free_page(): Detect a case of corrupted TRX_UNDO_PAGE_LIST. trx_undo_truncate_end(): Stop attempts to truncate a corrupted log. trx_t::commit_empty(): Add an error message of a corrupted log. Reviewed by: Thirunarayanan Balathandayuthapani --- storage/innobase/trx/trx0trx.cc | 8 +++++++- storage/innobase/trx/trx0undo.cc | 14 +++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 1ffc8c76189..1bd24870a0c 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -976,7 +976,13 @@ void trx_t::commit_empty(mtr_t *mtr) trx_undo_t *&undo= rsegs.m_redo.undo; ut_ad(undo->state == TRX_UNDO_ACTIVE || undo->state == TRX_UNDO_PREPARED); - ut_ad(undo->size == 1); + + if (UNIV_UNLIKELY(undo->size != 1)) + { + sql_print_error("InnoDB: Undo log for transaction " TRX_ID_FMT + " is corrupted (" UINT32PF "!=1)", id, undo->size); + ut_ad("corrupted undo log" == 0); + } if (buf_block_t *u= buf_page_get(page_id_t(rseg->space->id, undo->hdr_page_no), 0, diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 203edd9f537..63da2982fec 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -740,6 +740,14 @@ trx_undo_free_page( return FIL_NULL; } + const fil_addr_t last_addr = flst_get_last( + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + + header_block->page.frame); + if (UNIV_UNLIKELY(last_addr.page == page_no)) { + *err = DB_CORRUPTION; + return FIL_NULL; + } + *err = fseg_free_page(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + header_block->page.frame, rseg->space, page_no, mtr); @@ -748,9 +756,6 @@ trx_undo_free_page( } buf_page_free(rseg->space, page_no, mtr); - const fil_addr_t last_addr = flst_get_last( - TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST - + header_block->page.frame); rseg->curr_size--; if (!in_history) { @@ -794,6 +799,9 @@ static dberr_t trx_undo_truncate_end(trx_undo_t &undo, undo_no_t limit, { ut_ad(is_temp == !undo.rseg->is_persistent()); + if (UNIV_UNLIKELY(undo.last_page_no == FIL_NULL)) + return DB_CORRUPTION; + for (mtr_t mtr;;) { mtr.start(); From 8b509a5d64106dcd737a71921b8fc74ae3dd4130 Mon Sep 17 00:00:00 2001 From: Rex Date: Mon, 25 Sep 2023 12:56:30 +1100 Subject: [PATCH 19/24] Merge 10.4 into 10.5 --- mysql-test/main/cte_nonrecursive.result | 260 ++++++++++++++++++++++++ mysql-test/main/cte_nonrecursive.test | 158 ++++++++++++++ sql/sql_cte.cc | 2 +- sql/sql_derived.cc | 7 + sql/sql_lex.cc | 67 ++++++ sql/sql_lex.h | 8 + sql/sql_select.cc | 20 ++ 7 files changed, 521 insertions(+), 1 deletion(-) diff --git a/mysql-test/main/cte_nonrecursive.result b/mysql-test/main/cte_nonrecursive.result index f7871d4f929..e05120fa60d 100644 --- a/mysql-test/main/cte_nonrecursive.result +++ b/mysql-test/main/cte_nonrecursive.result @@ -2339,4 +2339,264 @@ set sql_mode="oracle"; with data as (select 1 as id) select id into @myid from data; set sql_mode= @save_sql_mode; +# +# MDEV-31995 Bogus error executing PS for query using CTE with renaming of columns +# +create table t1 (a int, b int); +insert into t1 values (1,1),(1,2),(1,3),(2,1),(2,2); +create table t2 (a int, b int); +insert into t2 values (3,1),(3,2),(3,3),(4,1),(4,2); +with cte (c1,c2) as +(select a as col1, sum(b) as col2 from t1 group by col1) +select * from cte; +c1 c2 +1 6 +2 3 +prepare st from "with cte (c1,c2) as +(select a as col1, sum(b) as col2 from t1 group by col1) +select * from cte"; +execute st; +c1 c2 +1 6 +2 3 +execute st; +c1 c2 +1 6 +2 3 +drop prepare st; +create procedure sp() with cte (c1,c2) as +(select a as col1, sum(b) as col2 from t1 group by col1) +select * from cte; +call sp(); +c1 c2 +1 6 +2 3 +call sp(); +c1 c2 +1 6 +2 3 +drop procedure sp; +with cte (c1,c2) as +(select a as col1, sum(b) as col2 from t1 order by col1) +select * from cte; +c1 c2 +1 9 +prepare st from "with cte (c1,c2) as +(select a as col1, sum(b) as col2 from t1 order by col1) +select * from cte"; +execute st; +c1 c2 +1 9 +execute st; +c1 c2 +1 9 +drop prepare st; +create procedure sp() with cte (c1,c2) as +(select a as col1, sum(b) as col2 from t1 order by col1) +select * from cte; +call sp(); +c1 c2 +1 9 +call sp(); +c1 c2 +1 9 +drop procedure sp; +with cte (c1,c2) as +(select a as col1, sum(b) as col2 from t1 where a > 1 group by col1 +union select a as col3, sum(b) as col4 from t2 where b > 2 group by col3), +cte2 (c3, c4) as +(select a as col5, sum(b) as col6 from t1 where a <= 1 group by col5 +union select a as col7, sum(b) as col8 from t2 where b <= 2 group by col7) +select * from cte where c1=1 union select * from cte2 where c3=3; +c1 c2 +3 3 +prepare st from "with cte (c1,c2) as +(select a as col1, sum(b) as col2 from t1 where a > 1 group by col1 +union select a as col3, sum(b) as col4 from t2 where b > 2 group by col3), +cte2 (c3, c4) as +(select a as col5, sum(b) as col6 from t1 where a <= 1 group by col5 +union select a as col7, sum(b) as col8 from t2 where b <= 2 group by col7) +select * from cte where c1=1 union select * from cte2 where c3=3"; +execute st; +c1 c2 +3 3 +execute st; +c1 c2 +3 3 +drop prepare st; +create procedure sp() with cte (c1,c2) as +(select a as col1, sum(b) as col2 from t1 where a > 1 group by col1 +union select a as col3, sum(b) as col4 from t2 where b > 2 group by col3), +cte2 (c3, c4) as +(select a as col5, sum(b) as col6 from t1 where a <= 1 group by col5 +union select a as col7, sum(b) as col8 from t2 where b <= 2 group by col7) +select * from cte where c1=1 union select * from cte2 where c3=3; +call sp(); +c1 c2 +3 3 +call sp(); +c1 c2 +3 3 +drop procedure sp; +with cte (c1,c2) as (select * from t1) +select cte.c1+1 as col1 , cte.c2 as col2 from cte where cte.c1 > 1 +union +select cte.c1 as col3, cte.c2+1 as col4 from cte where cte.c1 < 0; +col1 col2 +3 1 +3 2 +prepare st from "with cte (c1,c2) as (select * from t1) +select cte.c1+1 as col1 , cte.c2 as col2 from cte where cte.c1 > 1 +union +select cte.c1 as col3, cte.c2+1 as col4 from cte where cte.c1 < 0"; +execute st; +col1 col2 +3 1 +3 2 +execute st; +col1 col2 +3 1 +3 2 +save this to the end to test errors >drop prepare st; +create procedure sp() with cte (c1,c2) as (select * from t1) +select cte.c1+1 as col1 , cte.c2 as col2 from cte where cte.c1 > 1 +union +select cte.c1 as col3, cte.c2+1 as col4 from cte where cte.c1 < 0; +call sp(); +col1 col2 +3 1 +3 2 +call sp(); +col1 col2 +3 1 +3 2 +drop procedure sp; +insert into t1 select * from t2; +with cte (c1, c2) +as (select a, sum(b) from t1 where b > 1 group by a having sum(b) < 5) +select * from cte where c1 < 4 and c2 > 1; +c1 c2 +2 2 +# Check pushdown conditions in JSON output +explain format=json with cte (c1, c2) +as (select a, sum(b) from t1 where b > 1 group by a having sum(b) < 5) +select * from cte where c1 < 4 and c2 > 1; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "", + "access_type": "ALL", + "rows": 10, + "filtered": 100, + "attached_condition": "cte.c1 < 4 and cte.c2 > 1", + "materialized": { + "query_block": { + "select_id": 2, + "having_condition": "sum(t1.b) < 5 and c2 > 1", + "filesort": { + "sort_key": "t1.a", + "temporary_table": { + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 10, + "filtered": 100, + "attached_condition": "t1.b > 1 and t1.a < 4" + } + } + } + } + } + } + } +} +alter table t1 add column c int; +execute st; +ERROR HY000: WITH column list and SELECT field list have different column counts +drop prepare st; +drop table t1,t2; +Test out recursive CTEs +create table distances (src char(1), dest char(1), distance int); +create table city_population (city char(1), population int); +INSERT INTO `distances` VALUES ('A','A',0),('B','A',593),('C','A',800), +('D','A',221),('E','A',707),('F','A',869),('G','A',225),('H','A',519), +('A','B',919),('B','B',0),('C','B',440),('D','B',79),('E','B',79), +('F','B',154),('G','B',537),('H','B',220),('A','C',491),('B','C',794), +('C','C',0),('D','C',100),('E','C',350),('F','C',748),('G','C',712), +('H','C',315),('A','D',440),('B','D',256),('C','D',958),('D','D',0), +('E','D',255),('F','D',161),('G','D',63),('H','D',831),('A','E',968), +('B','E',345),('C','E',823),('D','E',81),('E','E',0),('F','E',436), +('G','E',373),('H','E',558),('A','F',670),('B','F',677),('C','F',375), +('D','F',843),('E','F',90),('F','F',0),('G','F',328),('H','F',881), +('A','G',422),('B','G',467),('C','G',67),('D','G',936),('E','G',480), +('F','G',592),('G','G',0),('H','G',819),('A','H',537),('B','H',229), +('C','H',534),('D','H',984),('E','H',319),('F','H',643),('G','H',257), +('H','H',0); +insert into city_population values ('A', 5000), ('B', 6000), ('C', 100000), +('D', 80000), ('E', 7000), ('F', 1000), ('G', 100), ('H', -80000); +#find the biggest city within 300 kellikams of 'E' +with recursive travel (src, path, dest, distance, population) as ( +select city, cast('' as varchar(10)), city, +0, population +from city_population where city='E' + union all +select src.src, concat(src.path, dst.dest), dst.dest, +src.distance + dst.distance, dstc.population +from travel src +join distances dst on src.dest != dst.dest +join city_population dstc on dst.dest = dstc.city +where dst.src = src.dest and src.distance + dst.distance < 300 +and length(path) < 10 +) +select * from travel where dest != 'E' order by population desc, distance +limit 1; +src path dest distance population +E FD D 251 80000 +prepare st from "with recursive travel (src, path, dest, distance, population) as ( +select city, cast('' as varchar(10)), city, +0, population +from city_population where city='E' + union all +select src.src, concat(src.path, dst.dest), dst.dest, +src.distance + dst.distance, dstc.population +from travel src +join distances dst on src.dest != dst.dest +join city_population dstc on dst.dest = dstc.city +where dst.src = src.dest and src.distance + dst.distance < 300 +and length(path) < 10 +) +select * from travel where dest != 'E' order by population desc, distance +limit 1"; +execute st; +src path dest distance population +E FD D 251 80000 +execute st; +src path dest distance population +E FD D 251 80000 +drop prepare st; +create procedure sp() with recursive travel (src, path, dest, distance, population) as ( +select city, cast('' as varchar(10)), city, +0, population +from city_population where city='E' + union all +select src.src, concat(src.path, dst.dest), dst.dest, +src.distance + dst.distance, dstc.population +from travel src +join distances dst on src.dest != dst.dest +join city_population dstc on dst.dest = dstc.city +where dst.src = src.dest and src.distance + dst.distance < 300 +and length(path) < 10 +) +select * from travel where dest != 'E' order by population desc, distance +limit 1; +call sp(); +src path dest distance population +E FD D 251 80000 +call sp(); +src path dest distance population +E FD D 251 80000 +drop procedure sp; +drop table distances, city_population; # End of 10.4 tests diff --git a/mysql-test/main/cte_nonrecursive.test b/mysql-test/main/cte_nonrecursive.test index c420a5e0483..1d29f3d4caa 100644 --- a/mysql-test/main/cte_nonrecursive.test +++ b/mysql-test/main/cte_nonrecursive.test @@ -1796,4 +1796,162 @@ with data as (select 1 as id) select id into @myid from data; set sql_mode= @save_sql_mode; + + +--echo # +--echo # MDEV-31995 Bogus error executing PS for query using CTE with renaming of columns +--echo # + +create table t1 (a int, b int); +insert into t1 values (1,1),(1,2),(1,3),(2,1),(2,2); +create table t2 (a int, b int); +insert into t2 values (3,1),(3,2),(3,3),(4,1),(4,2); + +let $q= +with cte (c1,c2) as + (select a as col1, sum(b) as col2 from t1 group by col1) +select * from cte; + +eval $q; + +eval prepare st from "$q"; +execute st; +execute st; +drop prepare st; + +eval create procedure sp() $q; +call sp(); +call sp(); +drop procedure sp; + +let $q= +with cte (c1,c2) as + (select a as col1, sum(b) as col2 from t1 order by col1) +select * from cte; + +eval $q; + +eval prepare st from "$q"; +execute st; +execute st; +drop prepare st; + +eval create procedure sp() $q; +call sp(); +call sp(); +drop procedure sp; + +let $q= +with cte (c1,c2) as + (select a as col1, sum(b) as col2 from t1 where a > 1 group by col1 + union select a as col3, sum(b) as col4 from t2 where b > 2 group by col3), +cte2 (c3, c4) as + (select a as col5, sum(b) as col6 from t1 where a <= 1 group by col5 + union select a as col7, sum(b) as col8 from t2 where b <= 2 group by col7) +select * from cte where c1=1 union select * from cte2 where c3=3; + +eval $q; + +eval prepare st from "$q"; +execute st; +execute st; +drop prepare st; + +eval create procedure sp() $q; +call sp(); +call sp(); +drop procedure sp; + +let $q= +with cte (c1,c2) as (select * from t1) +select cte.c1+1 as col1 , cte.c2 as col2 from cte where cte.c1 > 1 +union +select cte.c1 as col3, cte.c2+1 as col4 from cte where cte.c1 < 0; + +eval $q; + +eval prepare st from "$q"; +execute st; +execute st; +--echo save this to the end to test errors >drop prepare st; + +eval create procedure sp() $q; +call sp(); +call sp(); +drop procedure sp; + +insert into t1 select * from t2; + +let $q= +with cte (c1, c2) + as (select a, sum(b) from t1 where b > 1 group by a having sum(b) < 5) +select * from cte where c1 < 4 and c2 > 1; + +eval $q; + +--echo # Check pushdown conditions in JSON output +--source include/analyze-format.inc +eval explain format=json $q; + +alter table t1 add column c int; + +--error ER_WITH_COL_WRONG_LIST +execute st; + +drop prepare st; +drop table t1,t2; + +--echo Test out recursive CTEs + +create table distances (src char(1), dest char(1), distance int); +create table city_population (city char(1), population int); +INSERT INTO `distances` VALUES ('A','A',0),('B','A',593),('C','A',800), +('D','A',221),('E','A',707),('F','A',869),('G','A',225),('H','A',519), +('A','B',919),('B','B',0),('C','B',440),('D','B',79),('E','B',79), +('F','B',154),('G','B',537),('H','B',220),('A','C',491),('B','C',794), +('C','C',0),('D','C',100),('E','C',350),('F','C',748),('G','C',712), +('H','C',315),('A','D',440),('B','D',256),('C','D',958),('D','D',0), +('E','D',255),('F','D',161),('G','D',63),('H','D',831),('A','E',968), +('B','E',345),('C','E',823),('D','E',81),('E','E',0),('F','E',436), +('G','E',373),('H','E',558),('A','F',670),('B','F',677),('C','F',375), +('D','F',843),('E','F',90),('F','F',0),('G','F',328),('H','F',881), +('A','G',422),('B','G',467),('C','G',67),('D','G',936),('E','G',480), +('F','G',592),('G','G',0),('H','G',819),('A','H',537),('B','H',229), +('C','H',534),('D','H',984),('E','H',319),('F','H',643),('G','H',257), +('H','H',0); +insert into city_population values ('A', 5000), ('B', 6000), ('C', 100000), +('D', 80000), ('E', 7000), ('F', 1000), ('G', 100), ('H', -80000); + +--echo #find the biggest city within 300 kellikams of 'E' +let $q= +with recursive travel (src, path, dest, distance, population) as ( + select city, cast('' as varchar(10)), city, + 0, population + from city_population where city='E' + union all + select src.src, concat(src.path, dst.dest), dst.dest, + src.distance + dst.distance, dstc.population + from travel src + join distances dst on src.dest != dst.dest + join city_population dstc on dst.dest = dstc.city + where dst.src = src.dest and src.distance + dst.distance < 300 + and length(path) < 10 + ) +select * from travel where dest != 'E' order by population desc, distance +limit 1; + +eval $q; + +eval prepare st from "$q"; +execute st; +execute st; +drop prepare st; + +eval create procedure sp() $q; +call sp(); +call sp(); +drop procedure sp; + +drop table distances, city_population; + --echo # End of 10.4 tests diff --git a/sql/sql_cte.cc b/sql/sql_cte.cc index e22aa1ebd6f..ab9a3db5099 100644 --- a/sql/sql_cte.cc +++ b/sql/sql_cte.cc @@ -1199,7 +1199,7 @@ With_element::process_columns_of_derived_unit(THD *thd, /* Rename the columns of the first select in the unit */ while ((item= it++, name= nm++)) { - item->set_name(thd, *name); + lex_string_set(&item->name, name->str); item->common_flags&= ~IS_AUTO_GENERATED_NAME; } diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index e5a5b27840b..a5c1590ecfb 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -1334,6 +1334,13 @@ bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived) (derived->alias.str ? derived->alias.str : ""), derived->get_unit())); st_select_lex_unit *unit= derived->get_unit(); + st_select_lex *sl= unit->first_select(); + + // reset item names to that saved after wildcard expansion in JOIN::prepare + do + { + sl->restore_item_list_names(); + } while ((sl= sl->next_select())); derived->merged_for_insert= FALSE; unit->unclean(); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 246076d0b4a..739811af9c5 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -2958,6 +2958,7 @@ void st_select_lex::init_query() in_tvc= false; versioned_tables= 0; pushdown_select= 0; + orig_names_of_item_list_elems= 0; } void st_select_lex::init_select() @@ -3005,6 +3006,7 @@ void st_select_lex::init_select() versioned_tables= 0; is_tvc_wrapper= false; nest_flags= 0; + orig_names_of_item_list_elems= 0; } /* @@ -11080,6 +11082,71 @@ exit: } +/** + @brief + Save the original names of items from the item list. + + @retval + true - if an error occurs + false - otherwise +*/ + +bool st_select_lex::save_item_list_names(THD *thd) +{ + if (orig_names_of_item_list_elems) + return false; + + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + + if (unlikely(!(orig_names_of_item_list_elems= new(thd->mem_root) + List))) + return true; + + List_iterator_fast li(item_list); + Item *item; + + while ((item= li++)) + { + if (unlikely(orig_names_of_item_list_elems->push_back( + new Lex_ident_sys(item->name.str, item->name.length)))) + { + if (arena) + thd->restore_active_arena(arena, &backup); + orig_names_of_item_list_elems= 0; + return true; + } + } + + if (arena) + thd->restore_active_arena(arena, &backup); + + return false; +} + + +/** + @brief + Restore the name of each item in the item_list of this st_select_lex + from orig_names_of_item_list_elems. +*/ + +void st_select_lex::restore_item_list_names() +{ + if (!orig_names_of_item_list_elems) + return; + + DBUG_ASSERT(item_list.elements == orig_names_of_item_list_elems->elements); + + List_iterator_fast it(*orig_names_of_item_list_elems); + Lex_ident_sys *new_name; + List_iterator_fast li(item_list); + Item *item; + + while ((item= li++) && (new_name= it++)) + lex_string_set( &item->name, new_name->str); +} + bool LEX::stmt_install_plugin(const DDL_options_st &opt, const Lex_ident_sys_st &name, const LEX_CSTRING &soname) diff --git a/sql/sql_lex.h b/sql/sql_lex.h index abe5e34ca17..6c7ce6e98da 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -1410,6 +1410,9 @@ public: bool straight_fl); TABLE_LIST *convert_right_join(); List* get_item_list(); + bool save_item_list_names(THD *thd); + void restore_item_list_names(); + ulong get_table_join_options(); void set_lock_for_tables(thr_lock_type lock_type, bool for_update); /* @@ -1604,6 +1607,11 @@ private: index_clause_map current_index_hint_clause; /* a list of USE/FORCE/IGNORE INDEX */ List *index_hints; + /* + This list is used to restore the names of items + from item_list after each execution of the statement. + */ + List *orig_names_of_item_list_elems; public: inline void add_where_field(st_select_lex *sel) diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 016aef4e483..62766cdb9cf 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1317,6 +1317,26 @@ JOIN::prepare(TABLE_LIST *tables_init, COND *conds_init, uint og_num, if (setup_wild(thd, tables_list, fields_list, &all_fields, select_lex, false)) DBUG_RETURN(-1); + /* + If the select_lex is immediately contained within a derived table + AND this derived table is a CTE + WITH supplied column names + AND we have the correct number of elements in both lists + (mismatches found in mysql_derived_prepare/rename_columns_of_derived_unit) + THEN NOW is the time to take a copy of these item_names for + later restoration if required. + */ + TABLE_LIST *derived= select_lex->master_unit()->derived; + + if (derived && + derived->with && + derived->with->column_list.elements && + (derived->with->column_list.elements == select_lex->item_list.elements)) + { + if (select_lex->save_item_list_names(thd)) + DBUG_RETURN(-1); + } + if (thd->lex->current_select->first_cond_optimization) { if ( conds && ! thd->lex->current_select->merged_into) From 55a96c055a1c03595f414f4718ff417ae5c5d718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Nov 2023 16:39:02 +0200 Subject: [PATCH 20/24] MDEV-32050 fixup: innodb.instant_alter_crash This test occasionally fails with a failure to purge history. Let us try to purge everything before starting the interesting part, to make that occasional failure go away. --- mysql-test/suite/innodb/r/instant_alter_crash.result | 1 + mysql-test/suite/innodb/t/instant_alter_crash.test | 1 + 2 files changed, 2 insertions(+) diff --git a/mysql-test/suite/innodb/r/instant_alter_crash.result b/mysql-test/suite/innodb/r/instant_alter_crash.result index c6f7d3898be..b92f8ee8724 100644 --- a/mysql-test/suite/innodb/r/instant_alter_crash.result +++ b/mysql-test/suite/innodb/r/instant_alter_crash.result @@ -27,6 +27,7 @@ SELECT * FROM t2; id c2 c3 2 1 De finibus bonorum 3 4 accusantium doloremque laudantium +InnoDB 0 transactions not purged BEGIN; DELETE FROM t1; ROLLBACK; diff --git a/mysql-test/suite/innodb/t/instant_alter_crash.test b/mysql-test/suite/innodb/t/instant_alter_crash.test index b687664dbcc..8eb03b39a1b 100644 --- a/mysql-test/suite/innodb/t/instant_alter_crash.test +++ b/mysql-test/suite/innodb/t/instant_alter_crash.test @@ -38,6 +38,7 @@ disconnect ddl; SELECT * FROM t1; SELECT * FROM t2; +--source include/wait_all_purged.inc BEGIN; DELETE FROM t1; ROLLBACK; From 5a1f821b939997be02702a695d6830d650629137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Nov 2023 16:57:42 +0200 Subject: [PATCH 21/24] MDEV-31861 Empty INSERT crashes with innodb_force_recovery=6 or innodb_read_only=ON ha_innobase::extra(): Do not invoke log_buffer_flush_to_disk() if high_level_read_only holds. log_buffer_flush_to_disk(): Remove an assertion that duplicates one at the start of log_write_up_to(). --- mysql-test/suite/innodb/r/read_only_recovery.result | 2 ++ mysql-test/suite/innodb/t/read_only_recovery.test | 2 ++ storage/innobase/handler/ha_innodb.cc | 3 ++- storage/innobase/log/log0log.cc | 1 - 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/mysql-test/suite/innodb/r/read_only_recovery.result b/mysql-test/suite/innodb/r/read_only_recovery.result index add0da94cab..2cde58189d5 100644 --- a/mysql-test/suite/innodb/r/read_only_recovery.result +++ b/mysql-test/suite/innodb/r/read_only_recovery.result @@ -37,6 +37,8 @@ SELECT * FROM t; a 3 SET GLOBAL innodb_max_purge_lag_wait=0; +INSERT INTO mysql.innodb_index_stats +SELECT * FROM mysql.innodb_index_stats LIMIT 0; # restart SELECT * FROM t; a diff --git a/mysql-test/suite/innodb/t/read_only_recovery.test b/mysql-test/suite/innodb/t/read_only_recovery.test index 47146213090..d011b3aa5e3 100644 --- a/mysql-test/suite/innodb/t/read_only_recovery.test +++ b/mysql-test/suite/innodb/t/read_only_recovery.test @@ -39,6 +39,8 @@ SELECT * FROM t; SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; SELECT * FROM t; SET GLOBAL innodb_max_purge_lag_wait=0; +INSERT INTO mysql.innodb_index_stats +SELECT * FROM mysql.innodb_index_stats LIMIT 0; --let $restart_parameters= --source include/restart_mysqld.inc SELECT * FROM t; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 188f703f33c..d5cc2311a2b 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -15797,7 +15797,8 @@ ha_innobase::extra( break; case HA_EXTRA_END_ALTER_COPY: m_prebuilt->table->skip_alter_undo = 0; - if (!m_prebuilt->table->is_temporary()) { + if (!m_prebuilt->table->is_temporary() + && !high_level_read_only) { log_buffer_flush_to_disk(); } break; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index e9ef36b4b84..3f8703beb3e 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -871,7 +871,6 @@ repeat: @param sync whether to wait for a durable write to complete */ void log_buffer_flush_to_disk(bool sync) { - ut_ad(!srv_read_only_mode); log_write_up_to(log_sys.get_lsn(std::memory_order_acquire), sync); } From a3d0d5fc33d3fd248f2d09faedd1ba6daf7a2596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Nov 2023 17:45:18 +0200 Subject: [PATCH 22/24] MDEV-26055: Improve adaptive flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a 10.5 backport from 10.6 commit 9593cccf285ee348fc9a2743c1ed7d24c768439b. Adaptive flushing is enabled by setting innodb_max_dirty_pages_pct_lwm>0 (not default) and innodb_adaptive_flushing=ON (default). There is also the parameter innodb_adaptive_flushing_lwm (default: 10 per cent of the log capacity). It should enable some adaptive flushing even when innodb_max_dirty_pages_pct_lwm=0. That is not being changed here. This idea was first presented by Inaam Rana several years ago, and I discussed it with Jean-François Gagné at FOSDEM 2023. buf_flush_page_cleaner(): When we are not near the log capacity limit (neither buf_flush_async_lsn nor buf_flush_sync_lsn are set), also try to move clean blocks from the buf_pool.LRU list to buf_pool.free or initiate writes (but not the eviction) of dirty blocks, until the remaining I/O capacity has been consumed. buf_flush_LRU_list_batch(): Add the parameter bool evict, to specify whether dirty least recently used pages (from buf_pool.LRU) should be evicted immediately after they have been written out. Callers outside buf_flush_page_cleaner() will pass evict=true, to retain the existing behaviour. buf_do_LRU_batch(): Add the parameter bool evict. Return counts of evicted and flushed pages. buf_flush_LRU(): Add the parameter bool evict. Assume that the caller holds buf_pool.mutex and will invoke buf_dblwr.flush_buffered_writes() afterwards. buf_flush_list_holding_mutex(): A low-level variant of buf_flush_list() whose caller must hold buf_pool.mutex and invoke buf_dblwr.flush_buffered_writes() afterwards. buf_flush_wait_batch_end_acquiring_mutex(): Remove. It is enough to have buf_flush_wait_batch_end(). page_cleaner_flush_pages_recommendation(): Avoid some floating-point arithmetics. buf_flush_page(), buf_flush_check_neighbor(), buf_flush_check_neighbors(), buf_flush_try_neighbors(): Rename the parameter "bool lru" to "bool evict". buf_free_from_unzip_LRU_list_batch(): Remove the parameter. Only actual page writes will contribute towards the limit. buf_LRU_free_page(): Evict freed pages of temporary tables. buf_pool.done_free: Broadcast whenever a block is freed (and buf_pool.try_LRU_scan is set). buf_pool_t::io_buf_t::reserve(): Retry indefinitely. During the test encryption.innochecksum we easily run out of these buffers for PAGE_COMPRESSED or ENCRYPTED pages. Tested by Matthias Leich and Axel Schwenke --- storage/innobase/buf/buf0buf.cc | 78 ++++-- storage/innobase/buf/buf0flu.cc | 401 ++++++++++++++--------------- storage/innobase/buf/buf0lru.cc | 66 +++-- storage/innobase/include/buf0buf.h | 51 ++-- storage/innobase/include/buf0flu.h | 13 +- storage/innobase/include/ut0new.h | 1 + 6 files changed, 311 insertions(+), 299 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index c510d320759..f762cf65a01 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -410,9 +410,11 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage, return (true); } + buf_tmp_buffer_t* slot; + if (node.space->purpose == FIL_TYPE_TEMPORARY && innodb_encrypt_temporary_tables) { - buf_tmp_buffer_t* slot = buf_pool.io_buf_reserve(); + slot = buf_pool.io_buf_reserve(); ut_a(slot); slot->allocate(); @@ -431,7 +433,6 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage, tablespace and page contains used key_version. This is true also for pages first compressed and then encrypted. */ - buf_tmp_buffer_t* slot; uint key_version = buf_page_get_key_version(dst_frame, flags); if (page_compressed && !key_version) { @@ -444,7 +445,6 @@ decompress: } slot = buf_pool.io_buf_reserve(); - ut_a(slot); slot->allocate(); decompress_with_slot: @@ -472,7 +472,6 @@ decrypt_failed: } slot = buf_pool.io_buf_reserve(); - ut_a(slot); slot->allocate(); ut_d(fil_page_type_validate(node.space, dst_frame)); @@ -1494,6 +1493,41 @@ inline bool buf_pool_t::realloc(buf_block_t *block) return(true); /* free_list was enough */ } +void buf_pool_t::io_buf_t::create(ulint n_slots) +{ + this->n_slots= n_slots; + slots= static_cast + (ut_malloc_nokey(n_slots * sizeof *slots)); + memset((void*) slots, 0, n_slots * sizeof *slots); +} + +void buf_pool_t::io_buf_t::close() +{ + for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) + { + aligned_free(s->crypt_buf); + aligned_free(s->comp_buf); + } + ut_free(slots); + slots= nullptr; + n_slots= 0; +} + +buf_tmp_buffer_t *buf_pool_t::io_buf_t::reserve() +{ + for (;;) + { + for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) + if (s->acquire()) + return s; + os_aio_wait_until_no_pending_writes(); + for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) + if (s->acquire()) + return s; + os_aio_wait_until_no_pending_reads(); + } +} + /** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status to the specified string. The format and the following parameters are the same as the ones used for printf(3). @@ -1564,15 +1598,18 @@ inline bool buf_pool_t::withdraw_blocks() block = next_block; } - mysql_mutex_unlock(&mutex); /* reserve free_list length */ if (UT_LIST_GET_LEN(withdraw) < withdraw_target) { ulint n_flushed = buf_flush_LRU( std::max(withdraw_target - UT_LIST_GET_LEN(withdraw), - srv_LRU_scan_depth)); - buf_flush_wait_batch_end_acquiring_mutex(true); + srv_LRU_scan_depth), + true); + mysql_mutex_unlock(&buf_pool.mutex); + buf_dblwr.flush_buffered_writes(); + mysql_mutex_lock(&buf_pool.mutex); + buf_flush_wait_batch_end(true); if (n_flushed) { MONITOR_INC_VALUE_CUMULATIVE( @@ -1586,12 +1623,12 @@ inline bool buf_pool_t::withdraw_blocks() /* relocate blocks/buddies in withdrawn area */ ulint count2 = 0; - mysql_mutex_lock(&mutex); - buf_page_t* bpage; - bpage = UT_LIST_GET_FIRST(LRU); - while (bpage != NULL) { - buf_page_t* next_bpage = UT_LIST_GET_NEXT(LRU, bpage); - if (bpage->zip.data != NULL + buf_pool_mutex_exit_forbid(); + for (buf_page_t* bpage = UT_LIST_GET_FIRST(LRU), *next_bpage; + bpage; bpage = next_bpage) { + ut_ad(bpage->in_file()); + next_bpage = UT_LIST_GET_NEXT(LRU, bpage); + if (UNIV_LIKELY_NULL(bpage->zip.data) && will_be_withdrawn(bpage->zip.data) && bpage->can_relocate()) { buf_pool_mutex_exit_forbid(); @@ -2667,11 +2704,6 @@ buf_page_get_low( || (rw_latch == RW_X_LATCH) || (rw_latch == RW_SX_LATCH) || (rw_latch == RW_NO_LATCH)); - ut_ad(!allow_ibuf_merge - || mode == BUF_GET - || mode == BUF_GET_POSSIBLY_FREED - || mode == BUF_GET_IF_IN_POOL - || mode == BUF_GET_IF_IN_POOL_OR_WATCH); if (err) { *err = DB_SUCCESS; @@ -2685,15 +2717,15 @@ buf_page_get_low( replace any old pages, which were not evicted during DISCARD. Skip the assertion on space_page_size. */ break; - case BUF_PEEK_IF_IN_POOL: + default: + ut_ad(!allow_ibuf_merge); + ut_ad(mode == BUF_PEEK_IF_IN_POOL); + break; + case BUF_GET_POSSIBLY_FREED: case BUF_GET_IF_IN_POOL: /* The caller may pass a dummy page size, because it does not really matter. */ break; - default: - ut_error; - case BUF_GET_POSSIBLY_FREED: - break; case BUF_GET_NO_LATCH: ut_ad(rw_latch == RW_NO_LATCH); /* fall through */ diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 3ee2ea97534..f95af0819b4 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -372,13 +372,12 @@ void buf_page_write_complete(const IORequest &request) if (request.is_LRU()) { buf_LRU_free_page(bpage, true); + buf_pool.try_LRU_scan= true; + pthread_cond_signal(&buf_pool.done_free); ut_ad(buf_pool.n_flush_LRU_); if (!--buf_pool.n_flush_LRU_) - { pthread_cond_broadcast(&buf_pool.done_flush_LRU); - pthread_cond_signal(&buf_pool.done_free); - } } else { @@ -805,10 +804,10 @@ inline void buf_pool_t::release_freed_page(buf_page_t *bpage) /** Write a flushable page from buf_pool to a file. buf_pool.mutex must be held. @param bpage buffer control block -@param lru true=buf_pool.LRU; false=buf_pool.flush_list +@param evict whether to evict the page on write completion @param space tablespace @return whether the page was flushed and buf_pool.mutex was released */ -static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) +static bool buf_flush_page(buf_page_t *bpage, bool evict, fil_space_t *space) { ut_ad(bpage->in_file()); ut_ad(bpage->ready_for_flush()); @@ -817,7 +816,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) ut_ad(space->purpose == FIL_TYPE_TABLESPACE || space->atomic_write_supported); ut_ad(space->referenced()); - ut_ad(lru || space != fil_system.temp_space); + ut_ad(evict || space != fil_system.temp_space); rw_lock_t *rw_lock; @@ -840,7 +839,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) if (status != buf_page_t::FREED) { - if (lru) + if (evict) buf_pool.n_flush_LRU_++; else buf_pool.n_flush_list_++; @@ -858,7 +857,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) the buffer pool or removed from flush_list or LRU_list. */ DBUG_PRINT("ib_buf", ("%s %u page %u:%u", - lru ? "LRU" : "flush_list", + evict ? "LRU" : "flush_list", bpage->id().space(), bpage->id().page_no())); ut_ad(bpage->io_fix() == BUF_IO_WRITE); ut_d(const lsn_t oldest_modification= bpage->oldest_modification()); @@ -868,7 +867,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) ut_ad(bpage->state() == (rw_lock ? BUF_BLOCK_FILE_PAGE : BUF_BLOCK_ZIP_PAGE)); ut_ad(ULINT_UNDEFINED > - (lru ? buf_pool.n_flush_LRU_ : buf_pool.n_flush_list_)); + (evict ? buf_pool.n_flush_LRU_ : buf_pool.n_flush_list_)); mysql_mutex_unlock(&buf_pool.mutex); buf_block_t *block= reinterpret_cast(bpage); @@ -895,7 +894,8 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) #if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 size_t orig_size; #endif - IORequest::Type type= lru ? IORequest::WRITE_LRU : IORequest::WRITE_ASYNC; + IORequest::Type type= evict + ? IORequest::WRITE_LRU : IORequest::WRITE_ASYNC; if (UNIV_UNLIKELY(!rw_lock)) /* ROW_FORMAT=COMPRESSED */ { @@ -934,7 +934,7 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) #if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 if (size != orig_size && space->punch_hole) - type= lru ? IORequest::PUNCH_LRU : IORequest::PUNCH; + type= evict ? IORequest::PUNCH_LRU : IORequest::PUNCH; #endif frame=page; } @@ -968,9 +968,10 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space) /** Check whether a page can be flushed from the buf_pool. @param id page identifier @param fold id.fold() -@param lru true=buf_pool.LRU; false=buf_pool.flush_list +@param evict true=buf_pool.LRU; false=buf_pool.flush_list @return whether the page can be flushed */ -static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, bool lru) +static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, + bool evict) { mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(fold == id.fold()); @@ -980,9 +981,9 @@ static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, bool lru) if (!bpage || buf_pool.watch_is_sentinel(*bpage)) return false; - /* We avoid flushing 'non-old' blocks in an LRU flush, because the + /* We avoid flushing 'non-old' blocks in an eviction flush, because the flushed blocks are soon freed */ - if (lru && !bpage->is_old()) + if (evict && !bpage->is_old()) return false; return bpage->oldest_modification() > 1 && bpage->ready_for_flush(); @@ -992,11 +993,11 @@ static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, bool lru) @param space tablespace @param id page identifier of a dirty page @param contiguous whether to consider contiguous areas of pages -@param lru true=buf_pool.LRU; false=buf_pool.flush_list +@param evict true=buf_pool.LRU; false=buf_pool.flush_list @return last page number that can be flushed */ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, page_id_t &id, bool contiguous, - bool lru) + bool evict) { ut_ad(id.page_no() < space.size + (space.physical_size() == 2048 ? 1 @@ -1029,7 +1030,7 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, for (page_id_t i= id - 1;; --i) { fold--; - if (!buf_flush_check_neighbor(i, fold, lru)) + if (!buf_flush_check_neighbor(i, fold, evict)) { low= i + 1; break; @@ -1045,7 +1046,7 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, while (++i < high) { ++fold; - if (!buf_flush_check_neighbor(i, fold, lru)) + if (!buf_flush_check_neighbor(i, fold, evict)) break; } @@ -1120,20 +1121,20 @@ and also write zeroes or punch the hole for the freed ranges of pages. @param space tablespace @param page_id page identifier @param contiguous whether to consider contiguous areas of pages -@param lru true=buf_pool.LRU; false=buf_pool.flush_list +@param evict true=buf_pool.LRU; false=buf_pool.flush_list @param n_flushed number of pages flushed so far in this batch @param n_to_flush maximum number of pages we are allowed to flush @return number of pages flushed */ static ulint buf_flush_try_neighbors(fil_space_t *space, const page_id_t page_id, - bool contiguous, bool lru, + bool contiguous, bool evict, ulint n_flushed, ulint n_to_flush) { ut_ad(space->id == page_id.space()); ulint count= 0; page_id_t id= page_id; - page_id_t high= buf_flush_check_neighbors(*space, id, contiguous, lru); + page_id_t high= buf_flush_check_neighbors(*space, id, contiguous, evict); ut_ad(page_id >= id); ut_ad(page_id < high); @@ -1156,13 +1157,13 @@ static ulint buf_flush_try_neighbors(fil_space_t *space, if (buf_page_t *bpage= buf_pool.page_hash_get_low(id, id_fold)) { ut_ad(bpage->in_file()); - /* We avoid flushing 'non-old' blocks in an LRU flush, + /* We avoid flushing 'non-old' blocks in an eviction flush, because the flushed blocks are soon freed */ - if (!lru || id == page_id || bpage->is_old()) + if (!evict || id == page_id || bpage->is_old()) { if (!buf_pool.watch_is_sentinel(*bpage) && - bpage->oldest_modification() > 1 && - bpage->ready_for_flush() && buf_flush_page(bpage, lru, space)) + bpage->oldest_modification() > 1 && bpage->ready_for_flush() && + buf_flush_page(bpage, evict, space)) { ++count; continue; @@ -1188,12 +1189,8 @@ This utility moves the uncompressed frames of pages to the free list. Note that this function does not actually flush any data to disk. It just detaches the uncompressed frames from the compressed pages at the tail of the unzip_LRU and puts those freed frames in the free list. -Note that it is a best effort attempt and it is not guaranteed that -after a call to this function there will be 'max' blocks in the free -list. -@param[in] max desired number of blocks in the free_list @return number of blocks moved to the free list. */ -static ulint buf_free_from_unzip_LRU_list_batch(ulint max) +static ulint buf_free_from_unzip_LRU_list_batch() { ulint scanned = 0; ulint count = 0; @@ -1203,7 +1200,6 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max) buf_block_t* block = UT_LIST_GET_LAST(buf_pool.unzip_LRU); while (block - && count < max && UT_LIST_GET_LEN(buf_pool.free) < srv_LRU_scan_depth && UT_LIST_GET_LEN(buf_pool.unzip_LRU) > UT_LIST_GET_LEN(buf_pool.LRU) / 10) { @@ -1281,10 +1277,13 @@ static void buf_flush_discard_page(buf_page_t *bpage) buf_LRU_free_page(bpage, true); } -/** Flush dirty blocks from the end of the LRU list. -@param max maximum number of blocks to make available in buf_pool.free -@param n counts of flushed and evicted pages */ -static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) +/** Flush dirty blocks from the end buf_pool.LRU, +and move clean blocks to buf_pool.free. +@param max maximum number of blocks to flush +@param evict whether dirty pages are to be evicted after flushing them +@param n counts of flushed and evicted pages */ +static void buf_flush_LRU_list_batch(ulint max, bool evict, + flush_counters_t *n) { ulint scanned= 0; ulint free_limit= srv_LRU_scan_depth; @@ -1296,6 +1295,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN ? 0 : srv_flush_neighbors; fil_space_t *space= nullptr; + bool do_evict= evict; uint32_t last_space_id= FIL_NULL; static_assert(FIL_NULL > SRV_TMP_SPACE_ID, "consistency"); static_assert(FIL_NULL > SRV_SPACE_ID_UPPER_BOUND, "consistency"); @@ -1303,8 +1303,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU); bpage && ((UT_LIST_GET_LEN(buf_pool.LRU) > BUF_LRU_MIN_LEN && - UT_LIST_GET_LEN(buf_pool.free) < free_limit && - n->flushed + n->evicted < max) || + UT_LIST_GET_LEN(buf_pool.free) < free_limit) || recv_recovery_is_on()); ++scanned) { buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage); @@ -1320,8 +1319,8 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) } else if (oldest_modification > 1 && bpage->ready_for_flush()) { - /* Block is ready for flush. Dispatch an IO request. The IO - helper thread will put it on free list in IO completion routine. */ + /* Block is ready for flush. Dispatch an IO request. + If evict=true, the page will be evicted by buf_page_write_complete(). */ const page_id_t page_id(bpage->id()); const uint32_t space_id= page_id.space(); if (!space || space->id != space_id) @@ -1334,6 +1333,9 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) space->release(); auto p= buf_flush_space(space_id); space= p.first; + /* For the temporary tablespace, LRU flushing will always + evict pages upon completing the write. */ + do_evict= evict || space == fil_system.temp_space; last_space_id= space_id; mysql_mutex_lock(&buf_pool.mutex); if (p.second) @@ -1355,11 +1357,13 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) { mysql_mutex_unlock(&buf_pool.mutex); n->flushed+= buf_flush_try_neighbors(space, page_id, neighbors == 1, - true, n->flushed, max); + do_evict, n->flushed, max); reacquire_mutex: mysql_mutex_lock(&buf_pool.mutex); } - else if (buf_flush_page(bpage, true, space)) + else if (n->flushed >= max && !recv_recovery_is_on()) + break; + else if (buf_flush_page(bpage, do_evict, space)) { ++n->flushed; goto reacquire_mutex; @@ -1393,27 +1397,24 @@ reacquire_mutex: /** Flush and move pages from LRU or unzip_LRU list to the free list. Whether LRU or unzip_LRU is used depends on the state of the system. -@param max maximum number of blocks to make available in buf_pool.free -@return number of flushed pages */ -static ulint buf_do_LRU_batch(ulint max) +@param max maximum number of blocks to flush +@param evict whether dirty pages are to be evicted after flushing them +@param n counts of flushed and evicted pages */ +static void buf_do_LRU_batch(ulint max, bool evict, flush_counters_t *n) { - const ulint n_unzip_LRU_evicted= buf_LRU_evict_from_unzip_LRU() - ? buf_free_from_unzip_LRU_list_batch(max) - : 0; - flush_counters_t n; - n.flushed= 0; - n.evicted= n_unzip_LRU_evicted; - buf_flush_LRU_list_batch(max, &n); + if (buf_LRU_evict_from_unzip_LRU()) + buf_free_from_unzip_LRU_list_batch(); + n->evicted= 0; + n->flushed= 0; + buf_flush_LRU_list_batch(max, evict, n); - if (const ulint evicted= n.evicted - n_unzip_LRU_evicted) - { - MONITOR_INC_VALUE_CUMULATIVE(MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, - MONITOR_LRU_BATCH_EVICT_COUNT, - MONITOR_LRU_BATCH_EVICT_PAGES, - evicted); - } + mysql_mutex_assert_owner(&buf_pool.mutex); + buf_lru_flush_page_count+= n->flushed; - return n.flushed; + MONITOR_INC_VALUE_CUMULATIVE(MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, + MONITOR_LRU_BATCH_EVICT_COUNT, + MONITOR_LRU_BATCH_EVICT_PAGES, + n->evicted); } /** This utility flushes dirty blocks from the end of the flush_list. @@ -1469,7 +1470,7 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn) from buf_pool.flush_list must adjust the hazard pointer. Note: A concurrent execution of buf_flush_list_space() may - terminate this scan prematurely. The buf_pool.n_flush_list() + terminate this scan prematurely. The buf_pool.n_flush_list_ should prevent multiple threads from executing buf_do_flush_list_batch() concurrently, but buf_flush_list_space() is ignoring that. */ @@ -1576,46 +1577,53 @@ void buf_flush_wait_batch_end(bool lru) } /** Write out dirty blocks from buf_pool.flush_list. +The caller must invoke buf_dblwr.flush_buffered_writes() +after releasing buf_pool.mutex. @param max_n wished maximum mumber of blocks flushed @param lsn buf_pool.get_oldest_modification(LSN_MAX) target @return the number of processed pages @retval 0 if a buf_pool.flush_list batch is already running */ -static ulint buf_flush_list(ulint max_n= ULINT_UNDEFINED, lsn_t lsn= LSN_MAX) +static ulint buf_flush_list_holding_mutex(ulint max_n= ULINT_UNDEFINED, + lsn_t lsn= LSN_MAX) { ut_ad(lsn); + mysql_mutex_assert_owner(&buf_pool.mutex); - if (buf_pool.n_flush_list()) + if (buf_pool.n_flush_list_) return 0; - mysql_mutex_lock(&buf_pool.mutex); - const bool running= buf_pool.n_flush_list_ != 0; /* FIXME: we are performing a dirty read of buf_pool.flush_list.count while not holding buf_pool.flush_list_mutex */ - if (running || !UT_LIST_GET_LEN(buf_pool.flush_list)) + if (!UT_LIST_GET_LEN(buf_pool.flush_list)) { - if (!running) - pthread_cond_broadcast(&buf_pool.done_flush_list); - mysql_mutex_unlock(&buf_pool.mutex); + pthread_cond_broadcast(&buf_pool.done_flush_list); return 0; } buf_pool.n_flush_list_++; const ulint n_flushed= buf_do_flush_list_batch(max_n, lsn); - const ulint n_flushing= --buf_pool.n_flush_list_; - - buf_pool.try_LRU_scan= true; - - mysql_mutex_unlock(&buf_pool.mutex); - - if (!n_flushing) + if (!--buf_pool.n_flush_list_) pthread_cond_broadcast(&buf_pool.done_flush_list); - buf_dblwr.flush_buffered_writes(); - DBUG_PRINT("ib_buf", ("flush_list completed, " ULINTPF " pages", n_flushed)); return n_flushed; } +/** Write out dirty blocks from buf_pool.flush_list. +@param max_n wished maximum mumber of blocks flushed +@param lsn buf_pool.get_oldest_modification(LSN_MAX) target +@return the number of processed pages +@retval 0 if a buf_pool.flush_list batch is already running */ +static ulint buf_flush_list(ulint max_n= ULINT_UNDEFINED, + lsn_t lsn= LSN_MAX) +{ + mysql_mutex_lock(&buf_pool.mutex); + ulint n= buf_flush_list_holding_mutex(max_n, lsn); + mysql_mutex_unlock(&buf_pool.mutex); + buf_dblwr.flush_buffered_writes(); + return n; +} + /** Try to flush all the dirty pages that belong to a given tablespace. @param space tablespace @param n_flushed number of pages written @@ -1716,7 +1724,7 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed) mysql_mutex_unlock(&buf_pool.flush_list_mutex); buf_pool.try_LRU_scan= true; - + pthread_cond_broadcast(&buf_pool.done_free); mysql_mutex_unlock(&buf_pool.mutex); if (acquired) @@ -1730,43 +1738,41 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed) return may_have_skipped; } -/** Write out dirty blocks from buf_pool.LRU. +/** Write out dirty blocks from buf_pool.LRU, +and move clean blocks to buf_pool.free. +The caller must invoke buf_dblwr.flush_buffered_writes() +after releasing buf_pool.mutex. @param max_n wished maximum mumber of blocks flushed -@return the number of processed pages +@param evict whether to evict pages after flushing +@return evict ? number of processed pages : number of pages written @retval 0 if a buf_pool.LRU batch is already running */ -ulint buf_flush_LRU(ulint max_n) +ulint buf_flush_LRU(ulint max_n, bool evict) { - if (buf_pool.n_flush_LRU()) - return 0; + mysql_mutex_assert_owner(&buf_pool.mutex); - log_buffer_flush_to_disk(true); - - mysql_mutex_lock(&buf_pool.mutex); - if (buf_pool.n_flush_LRU_) + if (evict) { - mysql_mutex_unlock(&buf_pool.mutex); - return 0; + if (buf_pool.n_flush_LRU_) + return 0; + buf_pool.n_flush_LRU_= 1; } - buf_pool.n_flush_LRU_++; - ulint n_flushed= buf_do_LRU_batch(max_n); + flush_counters_t n; + buf_do_LRU_batch(max_n, evict, &n); - const ulint n_flushing= --buf_pool.n_flush_LRU_; - - buf_pool.try_LRU_scan= true; - - mysql_mutex_unlock(&buf_pool.mutex); - - if (!n_flushing) + if (n.evicted) { - pthread_cond_broadcast(&buf_pool.done_flush_LRU); + buf_pool.try_LRU_scan= true; pthread_cond_signal(&buf_pool.done_free); } - buf_dblwr.flush_buffered_writes(); + if (!evict) + return n.flushed; - DBUG_PRINT("ib_buf", ("LRU flush completed, " ULINTPF " pages", n_flushed)); - return n_flushed; + if (!--buf_pool.n_flush_LRU_) + pthread_cond_broadcast(&buf_pool.done_flush_LRU); + + return n.evicted + n.flushed; } /** Initiate a log checkpoint, discarding the start of the log. @@ -1928,7 +1934,9 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) { mysql_mutex_unlock(&buf_pool.flush_list_mutex); ulint n_pages= buf_flush_list(srv_max_io_capacity, sync_lsn); - buf_flush_wait_batch_end_acquiring_mutex(false); + mysql_mutex_lock(&buf_pool.mutex); + buf_flush_wait_batch_end(false); + mysql_mutex_unlock(&buf_pool.mutex); if (n_pages) { MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE, @@ -1994,17 +2002,6 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious) } } -/** Wait for pending flushes to complete. */ -void buf_flush_wait_batch_end_acquiring_mutex(bool lru) -{ - if (lru ? buf_pool.n_flush_LRU() : buf_pool.n_flush_list()) - { - mysql_mutex_lock(&buf_pool.mutex); - buf_flush_wait_batch_end(lru); - mysql_mutex_unlock(&buf_pool.mutex); - } -} - /** Conduct checkpoint-related flushing for innodb_flush_sync=ON, and try to initiate checkpoints until the target is met. @param lsn minimum value of buf_pool.get_oldest_modification(LSN_MAX) */ @@ -2116,8 +2113,9 @@ af_get_pct_for_lsn( / 7.5)); } -/** This function is called approximately once every second by the -page_cleaner thread if innodb_adaptive_flushing=ON. +/** This function is called approximately once every second by +buf_flush_page_cleaner() if innodb_max_dirty_pages_pct_lwm>0 +and innodb_adaptive_flushing=ON. Based on various factors it decides if there is a need to do flushing. @return number of pages recommended to be flushed @param last_pages_in number of pages flushed in previous batch @@ -2155,52 +2153,43 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in, n_pages= std::min(srv_io_capacity, dirty_blocks); } +func_exit: + page_cleaner.flush_pass++; return n_pages; } sum_pages += last_pages_in; - double time_elapsed = difftime(curr_time, prev_time); + const ulint time_elapsed = std::max(curr_time - prev_time, 1); - /* We update our variables every srv_flushing_avg_loops + /* We update our variables every innodb_flushing_avg_loops iterations to smooth out transition in workload. */ if (++n_iterations >= srv_flushing_avg_loops - || time_elapsed >= static_cast(srv_flushing_avg_loops)) { + || time_elapsed >= srv_flushing_avg_loops) { - if (time_elapsed < 1) { - time_elapsed = 1; - } - - avg_page_rate = static_cast( - ((static_cast(sum_pages) - / time_elapsed) - + static_cast(avg_page_rate)) / 2); + avg_page_rate = (sum_pages / time_elapsed + avg_page_rate) / 2; /* How much LSN we have generated since last call. */ - lsn_rate = static_cast( - static_cast(cur_lsn - prev_lsn) - / time_elapsed); + lsn_rate = (cur_lsn - prev_lsn) / time_elapsed; lsn_avg_rate = (lsn_avg_rate + lsn_rate) / 2; - ulint flush_tm = page_cleaner.flush_time; - ulint flush_pass = page_cleaner.flush_pass; - - page_cleaner.flush_time = 0; - page_cleaner.flush_pass = 0; - - if (flush_pass) { - flush_tm /= flush_pass; + if (page_cleaner.flush_pass) { + page_cleaner.flush_time /= page_cleaner.flush_pass; } - MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME, flush_tm); - MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_PASS, flush_pass); - prev_lsn = cur_lsn; prev_time = curr_time; - n_iterations = 0; + MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME, + page_cleaner.flush_time); + MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_PASS, + page_cleaner.flush_pass); + page_cleaner.flush_time = 0; + page_cleaner.flush_pass = 0; + + n_iterations = 0; sum_pages = 0; } @@ -2250,7 +2239,7 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in, MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty); MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn); - return(n_pages); + goto func_exit; } /******************************************************************//** @@ -2281,7 +2270,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) if (UNIV_UNLIKELY(lsn_limit != 0)) { -furious_flush: + furious_flush: if (UNIV_LIKELY(srv_flush_sync)) { buf_flush_sync_for_checkpoint(lsn_limit); @@ -2299,7 +2288,8 @@ furious_flush: if (buf_pool.page_cleaner_idle() && (!UT_LIST_GET_LEN(buf_pool.flush_list) || srv_max_dirty_pages_pct_lwm == 0.0)) - my_cond_wait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex.m_mutex); + my_cond_wait(&buf_pool.do_flush_list, + &buf_pool.flush_list_mutex.m_mutex); else my_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex.m_mutex, &abstime); @@ -2327,19 +2317,25 @@ furious_flush: /* wake up buf_flush_wait() */ pthread_cond_broadcast(&buf_pool.done_flush_list); } -unemployed: + unemployed: buf_flush_async_lsn= 0; + set_idle: buf_pool.page_cleaner_set_idle(true); - - DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", continue;); - DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", continue;); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); + end_of_batch: + buf_dblwr.flush_buffered_writes(); - if (!recv_recovery_is_on() && - !srv_startup_is_before_trx_rollback_phase && - srv_operation <= SRV_OPERATION_EXPORT_RESTORED) - log_checkpoint(); + do + { + DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", continue;); + DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", continue;); + + if (!recv_recovery_is_on() && + !srv_startup_is_before_trx_rollback_phase && + srv_operation <= SRV_OPERATION_EXPORT_RESTORED) + log_checkpoint(); + } + while (false); mysql_mutex_lock(&buf_pool.flush_list_mutex); continue; @@ -2390,57 +2386,40 @@ unemployed: if (!lsn_limit) lsn_limit= soft_lsn_limit; - ulint n_flushed; + ulint n_flushed= 0, n; if (UNIV_UNLIKELY(lsn_limit != 0)) { - n_flushed= buf_flush_list(srv_max_io_capacity, lsn_limit); - /* wake up buf_flush_wait() */ - pthread_cond_broadcast(&buf_pool.done_flush_list); - goto try_checkpoint; + n= srv_max_io_capacity; + goto background_flush; } else if (idle_flush || !srv_adaptive_flushing) { - n_flushed= buf_flush_list(srv_io_capacity); -try_checkpoint: - if (n_flushed) - { - MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, - MONITOR_FLUSH_BACKGROUND_COUNT, - MONITOR_FLUSH_BACKGROUND_PAGES, - n_flushed); -do_checkpoint: - /* The periodic log_checkpoint() call here makes it harder to - reproduce bugs in crash recovery or mariabackup --prepare, or - in code that writes the redo log records. Omitting the call - here should not affect correctness, because log_free_check() - should still be invoking checkpoints when needed. */ - DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", goto next;); - DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", goto next;); - - if (!recv_recovery_is_on() - && srv_operation <= SRV_OPERATION_EXPORT_RESTORED) - log_checkpoint(); - } + n= srv_io_capacity; + lsn_limit= LSN_MAX; + background_flush: + mysql_mutex_lock(&buf_pool.mutex); + n_flushed= buf_flush_list_holding_mutex(n, lsn_limit); + /* wake up buf_flush_wait() */ + pthread_cond_broadcast(&buf_pool.done_flush_list); + MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, + MONITOR_FLUSH_BACKGROUND_COUNT, + MONITOR_FLUSH_BACKGROUND_PAGES, + n_flushed); } - else if (ulint n= page_cleaner_flush_pages_recommendation(last_pages, - oldest_lsn, - dirty_blocks, - dirty_pct)) + else if ((n= page_cleaner_flush_pages_recommendation(last_pages, + oldest_lsn, + dirty_blocks, + dirty_pct)) != 0) { - page_cleaner.flush_pass++; const ulint tm= ut_time_ms(); - last_pages= n_flushed= buf_flush_list(n); + mysql_mutex_lock(&buf_pool.mutex); + last_pages= n_flushed= buf_flush_list_holding_mutex(n); page_cleaner.flush_time+= ut_time_ms() - tm; - - if (n_flushed) - { - MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, - MONITOR_FLUSH_ADAPTIVE_COUNT, - MONITOR_FLUSH_ADAPTIVE_PAGES, - n_flushed); - goto do_checkpoint; - } + MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, + MONITOR_FLUSH_ADAPTIVE_COUNT, + MONITOR_FLUSH_ADAPTIVE_PAGES, + n_flushed); } else if (buf_flush_async_lsn <= oldest_lsn) { @@ -2448,24 +2427,29 @@ do_checkpoint: goto unemployed; } -#ifndef DBUG_OFF -next: -#endif /* !DBUG_OFF */ - mysql_mutex_lock(&buf_pool.flush_list_mutex); + n= buf_flush_LRU(n >= n_flushed ? n - n_flushed : 0, false); + mysql_mutex_unlock(&buf_pool.mutex); + last_pages+= n; + + if (!idle_flush) + goto end_of_batch; /* when idle flushing kicks in page_cleaner is marked active. reset it back to idle since the it was made active as part of idle flushing stage. */ - if (idle_flush) - buf_pool.page_cleaner_set_idle(true); + mysql_mutex_lock(&buf_pool.flush_list_mutex); + goto set_idle; } mysql_mutex_unlock(&buf_pool.flush_list_mutex); if (srv_fast_shutdown != 2) { - buf_flush_wait_batch_end_acquiring_mutex(true); - buf_flush_wait_batch_end_acquiring_mutex(false); + buf_dblwr.flush_buffered_writes(); + mysql_mutex_lock(&buf_pool.mutex); + buf_flush_wait_batch_end(true); + buf_flush_wait_batch_end(false); + mysql_mutex_unlock(&buf_pool.mutex); } mysql_mutex_lock(&buf_pool.flush_list_mutex); @@ -2520,20 +2504,23 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool() while (buf_pool.get_oldest_modification(0)) { mysql_mutex_unlock(&buf_pool.flush_list_mutex); - buf_flush_list(srv_max_io_capacity); - if (buf_pool.n_flush_list()) + mysql_mutex_lock(&buf_pool.mutex); + buf_flush_list_holding_mutex(srv_max_io_capacity); + if (buf_pool.n_flush_list_) { + mysql_mutex_unlock(&buf_pool.mutex); timespec abstime; service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, "Waiting to flush " ULINTPF " pages", buf_flush_list_length()); set_timespec(abstime, INNODB_EXTEND_TIMEOUT_INTERVAL / 2); + buf_dblwr.flush_buffered_writes(); mysql_mutex_lock(&buf_pool.mutex); while (buf_pool.n_flush_list_) my_cond_timedwait(&buf_pool.done_flush_list, &buf_pool.mutex.m_mutex, &abstime); - mysql_mutex_unlock(&buf_pool.mutex); } + mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex); } diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index 0e9f834f127..34d737b3103 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -413,6 +413,7 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) && recv_sys.apply_log_recs) { goto flush_lru; }); +get_mutex: mysql_mutex_lock(&buf_pool.mutex); got_mutex: buf_LRU_check_size_of_non_data_objects(); @@ -501,15 +502,18 @@ not_found: #ifndef DBUG_OFF flush_lru: #endif - if (!buf_flush_LRU(innodb_lru_flush_size)) { + mysql_mutex_lock(&buf_pool.mutex); + + if (!buf_flush_LRU(innodb_lru_flush_size, true)) { MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT); ++flush_failures; } n_iterations++; - mysql_mutex_lock(&buf_pool.mutex); buf_pool.stat.LRU_waits++; - goto got_mutex; + mysql_mutex_unlock(&buf_pool.mutex); + buf_dblwr.flush_buffered_writes(); + goto get_mutex; } /** Move the LRU_old pointer so that the length of the old blocks list @@ -815,52 +819,60 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) const ulint fold = id.fold(); page_hash_latch* hash_lock = buf_pool.page_hash.lock_get(fold); hash_lock->write_lock(); - lsn_t oldest_modification = bpage->oldest_modification_acquire(); + const lsn_t oldest_modification = bpage->oldest_modification_acquire(); if (UNIV_UNLIKELY(!bpage->can_relocate())) { /* Do not free buffer fixed and I/O-fixed blocks. */ goto func_exit; } - if (oldest_modification == 1) { + switch (oldest_modification) { + case 2: + ut_ad(id.space() == SRV_TMP_SPACE_ID); + ut_ad(!bpage->zip.data); + if (bpage->status != buf_page_t::FREED) { + goto func_exit; + } + bpage->clear_oldest_modification(); + break; + case 1: mysql_mutex_lock(&buf_pool.flush_list_mutex); - oldest_modification = bpage->oldest_modification(); - if (oldest_modification) { - ut_ad(oldest_modification == 1); + if (const lsn_t om = bpage->oldest_modification()) { + ut_ad(om == 1); buf_pool.delete_from_flush_list(bpage); } mysql_mutex_unlock(&buf_pool.flush_list_mutex); ut_ad(!bpage->oldest_modification()); - oldest_modification = 0; - } - - if (zip || !bpage->zip.data) { - /* This would completely free the block. */ - /* Do not completely free dirty blocks. */ - - if (oldest_modification) { - goto func_exit; + /* fall through */ + case 0: + if (zip || !bpage->zip.data + || bpage->state() != BUF_BLOCK_FILE_PAGE) { + break; } - } else if (oldest_modification - && bpage->state() != BUF_BLOCK_FILE_PAGE) { -func_exit: - hash_lock->write_unlock(); - return(false); - - } else if (bpage->state() == BUF_BLOCK_FILE_PAGE) { - b = buf_page_alloc_descriptor(); +relocate_compressed: + b = static_cast(ut_zalloc_nokey(sizeof *b)); ut_a(b); mysql_mutex_lock(&buf_pool.flush_list_mutex); new (b) buf_page_t(*bpage); b->set_state(BUF_BLOCK_ZIP_PAGE); + break; + default: + if (zip || !bpage->zip.data + || bpage->state() != BUF_BLOCK_FILE_PAGE) { + /* This would completely free the block. */ + /* Do not completely free dirty blocks. */ +func_exit: + hash_lock->write_unlock(); + return(false); + } + goto relocate_compressed; } mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(bpage->in_file()); ut_ad(bpage->in_LRU_list); - DBUG_PRINT("ib_buf", ("free page %u:%u", - id.space(), id.page_no())); + DBUG_PRINT("ib_buf", ("free page %u:%u", id.space(), id.page_no())); ut_ad(bpage->can_relocate()); diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index a84ea047a54..def7641db1d 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1786,16 +1786,13 @@ public: MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex; /** Number of pending LRU flush; protected by mutex. */ ulint n_flush_LRU_; - /** broadcast when n_flush_LRU reaches 0; protected by mutex */ + /** broadcast when n_flush_LRU_ reaches 0; protected by mutex */ pthread_cond_t done_flush_LRU; /** Number of pending flush_list flush; protected by mutex */ ulint n_flush_list_; - /** broadcast when n_flush_list reaches 0; protected by mutex */ + /** broadcast when n_flush_list_ reaches 0; protected by mutex */ pthread_cond_t done_flush_list; - TPOOL_SUPPRESS_TSAN ulint n_flush_LRU() const { return n_flush_LRU_; } - TPOOL_SUPPRESS_TSAN ulint n_flush_list() const { return n_flush_list_; } - /** @name General fields */ /* @{ */ ulint curr_pool_size; /*!< Current pool size in bytes */ @@ -1949,7 +1946,7 @@ public: last_activity_count= activity_count; } - // n_flush_LRU() + n_flush_list() + // n_flush_LRU_ + n_flush_list_ // is approximately COUNT(io_fix()==BUF_IO_WRITE) in flush_list unsigned freed_page_clock;/*!< a sequence number used @@ -1979,7 +1976,8 @@ public: UT_LIST_BASE_NODE_T(buf_page_t) free; /*!< base node of the free block list */ - /** signaled each time when the free list grows; protected by mutex */ + /** signaled each time when the free list grows and + broadcast each time try_LRU_scan is set; protected by mutex */ pthread_cond_t done_free; UT_LIST_BASE_NODE_T(buf_page_t) withdraw; @@ -2045,9 +2043,9 @@ public: return any_pending; } /** @return total amount of pending I/O */ - ulint io_pending() const + TPOOL_SUPPRESS_TSAN ulint io_pending() const { - return n_pend_reads + n_flush_LRU() + n_flush_list(); + return n_pend_reads + n_flush_LRU_ + n_flush_list_; } private: @@ -2080,34 +2078,12 @@ private: /** array of slots */ buf_tmp_buffer_t *slots; - void create(ulint n_slots) - { - this->n_slots= n_slots; - slots= static_cast - (ut_malloc_nokey(n_slots * sizeof *slots)); - memset((void*) slots, 0, n_slots * sizeof *slots); - } + void create(ulint n_slots); - void close() - { - for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) - { - aligned_free(s->crypt_buf); - aligned_free(s->comp_buf); - } - ut_free(slots); - slots= nullptr; - n_slots= 0; - } + void close(); /** Reserve a buffer */ - buf_tmp_buffer_t *reserve() - { - for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) - if (s->acquire()) - return s; - return nullptr; - } + buf_tmp_buffer_t *reserve(); } io_buf; /** whether resize() is in the critical path */ @@ -2218,7 +2194,10 @@ inline void buf_page_t::set_oldest_modification(lsn_t lsn) /** Clear oldest_modification after removing from buf_pool.flush_list */ inline void buf_page_t::clear_oldest_modification() { - mysql_mutex_assert_owner(&buf_pool.flush_list_mutex); +#ifdef SAFE_MUTEX + if (oldest_modification() != 2) + mysql_mutex_assert_owner(&buf_pool.flush_list_mutex); +#endif /* SAFE_MUTEX */ ut_d(const auto state= state_); ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_ZIP_PAGE || state == BUF_BLOCK_REMOVE_HASH); @@ -2337,7 +2316,7 @@ MEMORY: is not in free list, LRU list, or flush list, nor page hash table FILE_PAGE: space and offset are defined, is in page hash table if io_fix == BUF_IO_WRITE, - buf_pool.n_flush_LRU() || buf_pool.n_flush_list() + buf_pool.n_flush_LRU_ || buf_pool.n_flush_list_ (1) if buf_fix_count == 0, then is in LRU list, not in free list diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index c772f84147d..8f18a45cb7a 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -85,11 +85,15 @@ buf_flush_init_for_writing( bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed= nullptr) MY_ATTRIBUTE((warn_unused_result)); -/** Write out dirty blocks from buf_pool.LRU. +/** Write out dirty blocks from buf_pool.LRU, +and move clean blocks to buf_pool.free. +The caller must invoke buf_dblwr.flush_buffered_writes() +after releasing buf_pool.mutex. @param max_n wished maximum mumber of blocks flushed -@return the number of processed pages +@param evict whether to evict pages after flushing +@return evict ? number of processed pages : number of pages written @retval 0 if a buf_pool.LRU batch is already running */ -ulint buf_flush_LRU(ulint max_n); +ulint buf_flush_LRU(ulint max_n, bool evict); /** Wait until a flush batch ends. @param lru true=buf_pool.LRU; false=buf_pool.flush_list */ @@ -119,9 +123,6 @@ buf_flush_note_modification( /** Initialize page_cleaner. */ ATTRIBUTE_COLD void buf_flush_page_cleaner_init(); -/** Wait for pending flushes to complete. */ -void buf_flush_wait_batch_end_acquiring_mutex(bool lru); - /** Flush the buffer pool on shutdown. */ ATTRIBUTE_COLD void buf_flush_buffer_pool(); diff --git a/storage/innobase/include/ut0new.h b/storage/innobase/include/ut0new.h index 4c8d2cf7a61..01bf34dc856 100644 --- a/storage/innobase/include/ut0new.h +++ b/storage/innobase/include/ut0new.h @@ -843,6 +843,7 @@ constexpr const char* const auto_event_names[] = "buf0buf", "buf0dblwr", "buf0dump", + "buf0lru", "dict0dict", "dict0mem", "dict0stats", From 9a545eb67ca8a666b87fc0aa8d22fe0a01c4d9d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Nov 2023 17:45:37 +0200 Subject: [PATCH 23/24] MDEV-26055: Correct the formula for adaptive flushing This is a 10.5 backport of 10.6 commit d4265fbde587bd79b6fe3793225d3f4798ee955e. page_cleaner_flush_pages_recommendation(): If dirty_pct is between innodb_max_dirty_pages_pct_lwm and innodb_max_dirty_pages_pct, scale the effort relative to how close we are to innodb_max_dirty_pages_pct. The previous formula was missing a multiplication by 100. --- storage/innobase/buf/buf0flu.cc | 54 +++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index f95af0819b4..77008312727 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -2120,10 +2120,12 @@ Based on various factors it decides if there is a need to do flushing. @return number of pages recommended to be flushed @param last_pages_in number of pages flushed in previous batch @param oldest_lsn buf_pool.get_oldest_modification(0) +@param pct_lwm innodb_max_dirty_pages_pct_lwm, or 0 to ignore it @param dirty_blocks UT_LIST_GET_LEN(buf_pool.flush_list) @param dirty_pct 100*flush_list.count / (LRU.count + free.count) */ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in, lsn_t oldest_lsn, + double pct_lwm, ulint dirty_blocks, double dirty_pct) { @@ -2193,11 +2195,17 @@ func_exit: sum_pages = 0; } - const ulint pct_for_dirty = srv_max_dirty_pages_pct_lwm == 0 - ? (dirty_pct >= max_pct ? 100 : 0) - : static_cast - (max_pct > 0.0 ? dirty_pct / max_pct : dirty_pct); - ulint pct_total = std::max(pct_for_dirty, pct_for_lsn); + MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn); + + double total_ratio; + if (pct_lwm == 0.0 || max_pct == 0.0) { + total_ratio = 1; + } else { + total_ratio = std::max(double(pct_for_lsn) / 100, + (dirty_pct / max_pct)); + } + + MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, ulint(total_ratio * 100)); /* Estimate pages to be flushed for the lsn progress */ lsn_t target_lsn = oldest_lsn @@ -2223,7 +2231,7 @@ func_exit: pages_for_lsn = 1; } - n_pages = (ulint(double(srv_io_capacity) * double(pct_total) / 100.0) + n_pages = (ulint(double(srv_io_capacity) * total_ratio) + avg_page_rate + pages_for_lsn) / 3; if (n_pages > srv_max_io_capacity) { @@ -2236,8 +2244,6 @@ func_exit: MONITOR_SET(MONITOR_FLUSH_AVG_PAGE_RATE, avg_page_rate); MONITOR_SET(MONITOR_FLUSH_LSN_AVG_RATE, lsn_avg_rate); - MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty); - MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn); goto func_exit; } @@ -2349,15 +2355,17 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) const double dirty_pct= double(dirty_blocks) * 100.0 / double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free)); - bool idle_flush= false; + double pct_lwm= 0.0; - if (lsn_limit || soft_lsn_limit); - else if (af_needed_for_redo(oldest_lsn)); - else if (srv_max_dirty_pages_pct_lwm != 0.0) + if (srv_max_dirty_pages_pct_lwm != 0.0) { + const double lwm= srv_max_dirty_pages_pct_lwm; const ulint activity_count= srv_get_activity_count(); if (activity_count != last_activity_count) + { last_activity_count= activity_count; + goto maybe_unemployed; + } else if (buf_pool.page_cleaner_idle() && buf_pool.n_pend_reads == 0) { /* reaching here means 3 things: @@ -2365,15 +2373,22 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) (no trx_t::commit activity) - page cleaner is idle (dirty_pct < srv_max_dirty_pages_pct_lwm) - there are no pending reads but there are dirty pages to flush */ - idle_flush= true; buf_pool.update_last_activity_count(activity_count); + pct_lwm= lwm; + } + else + { + maybe_unemployed: + const bool below{dirty_pct < pct_lwm}; + pct_lwm= 0.0; + if (below) + goto possibly_unemployed; } - - if (!idle_flush && dirty_pct < srv_max_dirty_pages_pct_lwm) - goto unemployed; } else if (dirty_pct < srv_max_buf_pool_modified_pct) - goto unemployed; + possibly_unemployed: + if (!lsn_limit && !soft_lsn_limit && !af_needed_for_redo(oldest_lsn)) + goto set_idle; if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit) lsn_limit= buf_flush_sync_lsn= 0; @@ -2393,7 +2408,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) n= srv_max_io_capacity; goto background_flush; } - else if (idle_flush || !srv_adaptive_flushing) + else if (pct_lwm != 0.0 || !srv_adaptive_flushing) { n= srv_io_capacity; lsn_limit= LSN_MAX; @@ -2409,6 +2424,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) } else if ((n= page_cleaner_flush_pages_recommendation(last_pages, oldest_lsn, + pct_lwm, dirty_blocks, dirty_pct)) != 0) { @@ -2431,7 +2447,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) mysql_mutex_unlock(&buf_pool.mutex); last_pages+= n; - if (!idle_flush) + if (pct_lwm == 0.0) goto end_of_batch; /* when idle flushing kicks in page_cleaner is marked active. From eb1f8b291911fee931acfd8883b4d6876d8689ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 17 Nov 2023 15:07:51 +0200 Subject: [PATCH 24/24] MDEV-32027 Opening all .ibd files on InnoDB startup can be slow dict_find_max_space_id(): Return SELECT MAX(SPACE) FROM SYS_TABLES. dict_check_tablespaces_and_store_max_id(): In the normal case (no encryption plugin has been loaded and the change buffer is empty), invoke dict_find_max_space_id() and do not open any .ibd files. If a std::set has been specified, open the files whose tablespace ID is mentioned. Else, open all data files that are identified by SYS_TABLES records. fil_ibd_open(): Remove a call to os_file_get_last_error() that can report a misleading error, such as EINVAL inside my_realpath() that is not an actual error. This could be invoked when a data file is found but the FSP_SPACE_FLAGS are incorrect, such as is the case for table test.td in ./mtr --mysqld=--innodb-buffer-pool-dump-at-shutdown=0 innodb.table_flags buf_load(): If any tablespaces could not be found, invoke dict_check_tablespaces_and_store_max_id() on the missing tablespaces. dict_load_tablespace(): Try to load the tablespace unless it was found to be futile. This fixes failures related to FTS_*.ibd files for FULLTEXT INDEX. btr_cur_t::search_leaf(): Prevent a crash when the tablespace does not exist. This was caught by the test innodb_fts.fts_concurrent_insert when the change to dict_load_tablespaces() was not present. We modify a few tests to ensure that tables will not be loaded at startup. For some fault injection tests this means that the corrupted tables will not be loaded, because dict_load_tablespace() would perform stricter checks than dict_check_tablespaces_and_store_max_id(). Tested by: Matthias Leich Reviewed by: Thirunarayanan Balathandayuthapani --- .../innodb/r/row_format_redundant.result | 3 +- mysql-test/suite/innodb/r/table_flags.result | 12 +-- .../suite/innodb/t/row_format_redundant.opt | 2 + mysql-test/suite/innodb/t/table_flags.opt | 1 + mysql-test/suite/innodb/t/table_flags.test | 2 + mysql-test/suite/innodb_zip/r/restart.result | 37 --------- mysql-test/suite/innodb_zip/t/restart.opt | 7 +- storage/innobase/btr/btr0cur.cc | 26 +++--- storage/innobase/buf/buf0dump.cc | 18 +++- storage/innobase/dict/dict0load.cc | 82 +++++++++++-------- storage/innobase/fil/fil0fil.cc | 2 - storage/innobase/include/dict0load.h | 16 ++-- storage/innobase/srv/srv0start.cc | 12 +-- 13 files changed, 104 insertions(+), 116 deletions(-) diff --git a/mysql-test/suite/innodb/r/row_format_redundant.result b/mysql-test/suite/innodb/r/row_format_redundant.result index d95c37f18cc..e3356c633df 100644 --- a/mysql-test/suite/innodb/r/row_format_redundant.result +++ b/mysql-test/suite/innodb/r/row_format_redundant.result @@ -68,9 +68,8 @@ DROP TABLE t1; Warnings: Warning 1932 Table 'test.t1' doesn't exist in engine DROP TABLE t2,t3; -FOUND 6 /\[ERROR\] InnoDB: Table test/t1 in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=1 SYS_TABLES\.MIX_LEN=511\b/ in mysqld.1.err +FOUND 5 /\[ERROR\] InnoDB: Table test/t1 in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=1 SYS_TABLES\.MIX_LEN=511\b/ in mysqld.1.err # restart -ib_buffer_pool ib_logfile0 ibdata1 db.opt diff --git a/mysql-test/suite/innodb/r/table_flags.result b/mysql-test/suite/innodb/r/table_flags.result index 779990351c6..cc32472f39c 100644 --- a/mysql-test/suite/innodb/r/table_flags.result +++ b/mysql-test/suite/innodb/r/table_flags.result @@ -101,13 +101,9 @@ ERROR 42S02: Table 'test.tc' doesn't exist in engine SELECT * FROM tc; ERROR 42S02: Table 'test.tc' doesn't exist in engine SHOW CREATE TABLE td; -Table Create Table -td CREATE TABLE `td` ( - `a` int(11) NOT NULL, - PRIMARY KEY (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci ROW_FORMAT=DYNAMIC +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB SELECT * FROM td; -a +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB SHOW CREATE TABLE tz; Table Create Table tz CREATE TABLE `tz` ( @@ -122,8 +118,8 @@ a 42 SHOW CREATE TABLE tp; ERROR 42S02: Table 'test.tp' doesn't exist in engine -FOUND 5 /InnoDB: Table test/t[cp] in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=(129|289|3873|1232[13]) SYS_TABLES\.N_COLS=2147483649/ in mysqld.1.err -FOUND 2 /InnoDB: Table test/tr in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=65 SYS_TABLES\.MIX_LEN=4294967295\b/ in mysqld.1.err +FOUND 3 /InnoDB: Table test/t[cp] in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=(129|289|3873|1232[13]) SYS_TABLES\.N_COLS=2147483649/ in mysqld.1.err +FOUND 1 /InnoDB: Table test/tr in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=65 SYS_TABLES\.MIX_LEN=4294967295\b/ in mysqld.1.err Restoring SYS_TABLES clustered index root page (8) # restart: with restart_parameters SHOW CREATE TABLE tr; diff --git a/mysql-test/suite/innodb/t/row_format_redundant.opt b/mysql-test/suite/innodb/t/row_format_redundant.opt index c44c611ed60..d1d93da09fb 100644 --- a/mysql-test/suite/innodb/t/row_format_redundant.opt +++ b/mysql-test/suite/innodb/t/row_format_redundant.opt @@ -1 +1,3 @@ --innodb-checksum-algorithm=crc32 +--skip-innodb-fast-shutdown +--skip-innodb-buffer-pool-dump-at-shutdown diff --git a/mysql-test/suite/innodb/t/table_flags.opt b/mysql-test/suite/innodb/t/table_flags.opt index bca674950d2..8f6c7db5f01 100644 --- a/mysql-test/suite/innodb/t/table_flags.opt +++ b/mysql-test/suite/innodb/t/table_flags.opt @@ -1,2 +1,3 @@ --innodb-checksum-algorithm=crc32 --skip-innodb-read-only-compressed +--skip-innodb-buffer-pool-dump-at-shutdown diff --git a/mysql-test/suite/innodb/t/table_flags.test b/mysql-test/suite/innodb/t/table_flags.test index 34204ae11f4..511d3c241ba 100644 --- a/mysql-test/suite/innodb/t/table_flags.test +++ b/mysql-test/suite/innodb/t/table_flags.test @@ -157,7 +157,9 @@ SHOW CREATE TABLE tr; SHOW CREATE TABLE tc; --error ER_NO_SUCH_TABLE_IN_ENGINE SELECT * FROM tc; +--error ER_GET_ERRNO SHOW CREATE TABLE td; +--error ER_GET_ERRNO SELECT * FROM td; # This table was converted to NO_ROLLBACK due to the SYS_TABLES.TYPE change. SHOW CREATE TABLE tz; diff --git a/mysql-test/suite/innodb_zip/r/restart.result b/mysql-test/suite/innodb_zip/r/restart.result index eb1bfe67c5d..8bd3f73f8e0 100644 --- a/mysql-test/suite/innodb_zip/r/restart.result +++ b/mysql-test/suite/innodb_zip/r/restart.result @@ -527,15 +527,6 @@ Variable_name Value innodb_file_per_table ON === information_schema.innodb_sys_tablespaces and innodb_sys_datafiles === Space_Name Page_Size Zip_Size Path -test/t4_restart DEFAULT DEFAULT MYSQLD_DATADIR/test/t4_restart.ibd -test/t6_restart#p#p0 DEFAULT 2048 MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p0.ibd -test/t6_restart#p#p1 DEFAULT 2048 MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p1.ibd -test/t7_restart#p#p0#sp#s0 DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s0.ibd -test/t7_restart#p#p0#sp#s1 DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s1.ibd -test/t5_restart DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd -test/t6_restart#p#p2 DEFAULT 2048 MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p2.ibd -test/t7_restart#p#p1#sp#s2 DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s2.ibd -test/t7_restart#p#p1#sp#s3 DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s3.ibd innodb_temporary DEFAULT DEFAULT MYSQLD_DATADIR/ibtmp1 SELECT count(*) FROM t5_restart; count(*) @@ -629,7 +620,6 @@ RENAME TABLE t6_restart TO t66_restart; RENAME TABLE t7_restart TO t77_restart; === information_schema.innodb_sys_tablespaces and innodb_sys_datafiles === Space_Name Page_Size Zip_Size Path -test/t4_restart DEFAULT DEFAULT MYSQLD_DATADIR/test/t4_restart.ibd test/t66_restart#p#p0 DEFAULT 2048 MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p0.ibd test/t66_restart#p#p1 DEFAULT 2048 MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p1.ibd test/t77_restart#p#p0#sp#s0 DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s0.ibd @@ -728,15 +718,6 @@ Variable_name Value innodb_file_per_table ON === information_schema.innodb_sys_tablespaces and innodb_sys_datafiles === Space_Name Page_Size Zip_Size Path -test/t4_restart DEFAULT DEFAULT MYSQLD_DATADIR/test/t4_restart.ibd -test/t66_restart#p#p0 DEFAULT 2048 MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p0.ibd -test/t66_restart#p#p1 DEFAULT 2048 MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p1.ibd -test/t77_restart#p#p0#sp#s0 DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s0.ibd -test/t77_restart#p#p0#sp#s1 DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s1.ibd -test/t55_restart DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t55_restart.ibd -test/t66_restart#p#p2 DEFAULT 2048 MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p2.ibd -test/t77_restart#p#p1#sp#s2 DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p1#sp#s2.ibd -test/t77_restart#p#p1#sp#s3 DEFAULT DEFAULT MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p1#sp#s3.ibd innodb_temporary DEFAULT DEFAULT MYSQLD_DATADIR/ibtmp1 INSERT INTO t55_restart (SELECT 0, c2, c3, c4, c5 FROM t55_restart); SELECT count(*) FROM t55_restart; @@ -863,15 +844,6 @@ t77_restart#p#p1#sp#s3.ibd # restart === information_schema.innodb_sys_tablespaces and innodb_sys_datafiles === Space_Name Page_Size Zip_Size Path -test/t4_restart DEFAULT DEFAULT MYSQL_TMP_DIR/new_dir/test/t4_restart.ibd -test/t66_restart#p#p0 DEFAULT 2048 MYSQL_TMP_DIR/new_dir/test/t66_restart#p#p0.ibd -test/t66_restart#p#p1 DEFAULT 2048 MYSQL_TMP_DIR/new_dir/test/t66_restart#p#p1.ibd -test/t77_restart#p#p0#sp#s0 DEFAULT DEFAULT MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p0#sp#s0.ibd -test/t77_restart#p#p0#sp#s1 DEFAULT DEFAULT MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p0#sp#s1.ibd -test/t55_restart DEFAULT DEFAULT MYSQL_TMP_DIR/new_dir/test/t55_restart.ibd -test/t66_restart#p#p2 DEFAULT 2048 MYSQL_TMP_DIR/new_dir/test/t66_restart#p#p2.ibd -test/t77_restart#p#p1#sp#s2 DEFAULT DEFAULT MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p1#sp#s2.ibd -test/t77_restart#p#p1#sp#s3 DEFAULT DEFAULT MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p1#sp#s3.ibd innodb_temporary DEFAULT DEFAULT MYSQLD_DATADIR/ibtmp1 INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart); SELECT count(*) FROM t4_restart; @@ -1002,15 +974,6 @@ t77_restart.par # restart === information_schema.innodb_sys_tablespaces and innodb_sys_datafiles === Space_Name Page_Size Zip_Size Path -test/t4_restart DEFAULT DEFAULT MYSQLD_DATADIR/test/t4_restart.ibd -test/t66_restart#p#p0 DEFAULT 2048 MYSQLD_DATADIR/test/t66_restart#p#p0.ibd -test/t66_restart#p#p1 DEFAULT 2048 MYSQLD_DATADIR/test/t66_restart#p#p1.ibd -test/t77_restart#p#p0#sp#s0 DEFAULT DEFAULT MYSQLD_DATADIR/test/t77_restart#p#p0#sp#s0.ibd -test/t77_restart#p#p0#sp#s1 DEFAULT DEFAULT MYSQLD_DATADIR/test/t77_restart#p#p0#sp#s1.ibd -test/t55_restart DEFAULT DEFAULT MYSQLD_DATADIR/test/t55_restart.ibd -test/t66_restart#p#p2 DEFAULT 2048 MYSQLD_DATADIR/test/t66_restart#p#p2.ibd -test/t77_restart#p#p1#sp#s2 DEFAULT DEFAULT MYSQLD_DATADIR/test/t77_restart#p#p1#sp#s2.ibd -test/t77_restart#p#p1#sp#s3 DEFAULT DEFAULT MYSQLD_DATADIR/test/t77_restart#p#p1#sp#s3.ibd innodb_temporary DEFAULT DEFAULT MYSQLD_DATADIR/ibtmp1 INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart); SELECT count(*) FROM t4_restart; diff --git a/mysql-test/suite/innodb_zip/t/restart.opt b/mysql-test/suite/innodb_zip/t/restart.opt index d7564300c09..03fb84e94a6 100644 --- a/mysql-test/suite/innodb_zip/t/restart.opt +++ b/mysql-test/suite/innodb_zip/t/restart.opt @@ -1,2 +1,5 @@ ---loose-innodb-sys-tables ---loose-innodb-sys-tablespaces +--innodb-sys-tables +--innodb-sys-tablespaces +--skip-innodb-stats-persistent +--skip-innodb-buffer-pool-dump-at-shutdown +--skip-innodb-fast-shutdown diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 6f71ecee5e4..b05c9db787b 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1155,6 +1155,19 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, mtr_s_lock_index(index(), mtr); } + dberr_t err; + + if (!index()->table->space) + { + corrupted: + ut_ad("corrupted" == 0); // FIXME: remove this + err= DB_CORRUPTION; + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + return err; + } + const ulint zip_size= index()->table->space->zip_size(); /* Start with the root page. */ @@ -1168,7 +1181,6 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, low_bytes= 0; ulint buf_mode= BUF_GET; search_loop: - dberr_t err; auto block_savepoint= mtr->get_savepoint(); buf_block_t *block= buf_page_get_gen(page_id, zip_size, rw_latch, guess, buf_mode, mtr, @@ -1180,10 +1192,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, btr_decryption_failed(*index()); /* fall through */ default: - func_exit: - if (UNIV_LIKELY_NULL(heap)) - mem_heap_free(heap); - return err; + goto func_exit; case DB_SUCCESS: /* This must be a search to perform an insert, delete mark, or delete; try using the change buffer */ @@ -1250,12 +1259,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, btr_page_get_index_id(block->page.frame) != index()->id || fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || !fil_page_index_page_check(block->page.frame)) - { - corrupted: - ut_ad("corrupted" == 0); // FIXME: remove this - err= DB_CORRUPTION; - goto func_exit; - } + goto corrupted; page_cur.block= block; ut_ad(block == mtr->at_savepoint(block_savepoint)); diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc index 4703223adf4..f142263a0e4 100644 --- a/storage/innobase/buf/buf0dump.cc +++ b/storage/innobase/buf/buf0dump.cc @@ -33,7 +33,7 @@ Created April 08, 2011 Vasil Dimov #include "buf0rea.h" #include "buf0dump.h" -#include "dict0dict.h" +#include "dict0load.h" #include "os0file.h" #include "srv0srv.h" #include "srv0start.h" @@ -557,6 +557,22 @@ buf_load() if (!SHUTTING_DOWN()) { std::sort(dump, dump + dump_n); + std::set missing; + for (const page_id_t id : st_::span + (dump, dump_n)) { + missing.emplace(id.space()); + } + for (std::set::iterator i = missing.begin(); + i != missing.end(); ) { + auto j = i++; + if (fil_space_t* space = fil_space_t::get(*j)) { + space->release(); + missing.erase(j); + } + } + if (!missing.empty()) { + dict_check_tablespaces_and_store_max_id(&missing); + } } /* Avoid calling the expensive fil_space_t::get() for each diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 129a2539341..d79d3eab8c3 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -33,8 +33,8 @@ Created 4/24/1996 Heikki Tuuri #include "dict0boot.h" #include "dict0crea.h" #include "dict0dict.h" -#include "dict0mem.h" #include "dict0stats.h" +#include "ibuf0ibuf.h" #include "fsp0file.h" #include "fts0priv.h" #include "mach0data.h" @@ -867,18 +867,30 @@ err_exit: return READ_OK; } -/** Check each tablespace found in the data dictionary. -Then look at each table defined in SYS_TABLES that has a space_id > 0 -to find all the file-per-table tablespaces. +/** @return SELECT MAX(space) FROM sys_tables */ +static uint32_t dict_find_max_space_id(btr_pcur_t *pcur, mtr_t *mtr) +{ + uint32_t max_space_id= 0; -In a crash recovery we already have some tablespace objects created from -processing the REDO log. We will compare the -space_id information in the data dictionary to what we find in the -tablespace file. In addition, more validation will be done if recovery -was needed and force_recovery is not set. + for (const rec_t *rec= dict_startscan_system(pcur, mtr, dict_sys.sys_tables); + rec; rec= dict_getnext_system_low(pcur, mtr)) + if (!dict_sys_tables_rec_check(rec)) + { + ulint len; + const byte *field= + rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__SPACE, &len); + ut_ad(len == 4); + max_space_id= std::max(max_space_id, mach_read_from_4(field)); + } -We also scan the biggest space id, and store it to fil_system. */ -void dict_check_tablespaces_and_store_max_id() + return max_space_id; +} + +/** Check MAX(SPACE) FROM SYS_TABLES and store it in fil_system. +Open each data file if an encryption plugin has been loaded. + +@param spaces set of tablespace files to open */ +void dict_check_tablespaces_and_store_max_id(const std::set *spaces) { ulint max_space_id = 0; btr_pcur_t pcur; @@ -890,6 +902,12 @@ void dict_check_tablespaces_and_store_max_id() dict_sys.lock(SRW_LOCK_CALL); + if (!spaces && ibuf.empty + && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) { + max_space_id = dict_find_max_space_id(&pcur, &mtr); + goto done; + } + for (const rec_t *rec = dict_startscan_system(&pcur, &mtr, dict_sys.sys_tables); rec; rec = dict_getnext_system_low(&pcur, &mtr)) { @@ -921,14 +939,6 @@ void dict_check_tablespaces_and_store_max_id() continue; } - if (flags2 & DICT_TF2_DISCARDED) { - sql_print_information("InnoDB: Ignoring tablespace" - " for %.*s because " - "the DISCARD flag is set", - static_cast(len), field); - continue; - } - /* For tables or partitions using .ibd files, the flag DICT_TF2_USE_FILE_PER_TABLE was not set in MIX_LEN before MySQL 5.6.5. The flag should not have been @@ -941,6 +951,19 @@ void dict_check_tablespaces_and_store_max_id() continue; } + if (spaces && spaces->find(uint32_t(space_id)) + == spaces->end()) { + continue; + } + + if (flags2 & DICT_TF2_DISCARDED) { + sql_print_information("InnoDB: Ignoring tablespace" + " for %.*s because " + "the DISCARD flag is set", + static_cast(len), field); + continue; + } + const span name{field, len}; char* filepath = fil_make_filepath(nullptr, name, @@ -973,6 +996,7 @@ void dict_check_tablespaces_and_store_max_id() ut_free(filepath); } +done: mtr.commit(); fil_set_max_space_id_if_bigger(max_space_id); @@ -2248,22 +2272,10 @@ dict_load_tablespace( /* The tablespace may already be open. */ table->space = fil_space_for_table_exists_in_mem(table->space_id, table->flags); - if (table->space) { + if (table->space || table->file_unreadable) { return; } - if (ignore_err >= DICT_ERR_IGNORE_TABLESPACE) { - table->file_unreadable = true; - return; - } - - if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) { - ib::error() << "Failed to find tablespace for table " - << table->name << " in the cache. Attempting" - " to load the tablespace with space id " - << table->space_id; - } - /* Use the remote filepath if needed. This parameter is optional in the call to fil_ibd_open(). If not supplied, it will be built from the table->name. */ @@ -2286,6 +2298,12 @@ dict_load_tablespace( if (!table->space) { /* We failed to find a sensible tablespace file */ table->file_unreadable = true; + + if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) { + sql_print_error("InnoDB: Failed to load tablespace " + ULINTPF " for table %s", + table->space_id, table->name); + } } ut_free(filepath); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 02203ba3d7b..1e0b7a9c8c5 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2221,8 +2221,6 @@ func_exit: goto corrupted; } - os_file_get_last_error(operation_not_for_export, - !operation_not_for_export); if (!operation_not_for_export) { goto corrupted; } diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h index f7d33d5b43b..3143aafdc6b 100644 --- a/storage/innobase/include/dict0load.h +++ b/storage/innobase/include/dict0load.h @@ -35,22 +35,16 @@ Created 4/24/1996 Heikki Tuuri #include "btr0types.h" #include +#include /** A stack of table names related through foreign key constraints */ typedef std::deque > dict_names_t; -/** Check each tablespace found in the data dictionary. -Then look at each table defined in SYS_TABLES that has a space_id > 0 -to find all the file-per-table tablespaces. +/** Check MAX(SPACE) FROM SYS_TABLES and store it in fil_system. +Open each data file if an encryption plugin has been loaded. -In a crash recovery we already have some tablespace objects created from -processing the REDO log. We will compare the -space_id information in the data dictionary to what we find in the -tablespace file. In addition, more validation will be done if recovery -was needed and force_recovery is not set. - -We also scan the biggest space id, and store it to fil_system. */ -void dict_check_tablespaces_and_store_max_id(); +@param spaces set of tablespace files to open */ +void dict_check_tablespaces_and_store_max_id(const std::set *spaces); /** Make sure the data_file_name is saved in dict_table_t if needed. @param[in,out] table Table object */ diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 96b6f2c83df..761330f59c9 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1819,21 +1819,13 @@ file_checked: } if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) { - /* The following call is necessary for the insert + /* The following call is necessary for the change buffer to work with multiple tablespaces. We must know the mapping between space id's and .ibd file names. - In a crash recovery, we check that the info in data - dictionary is consistent with what we already know - about space id's from the calls to fil_ibd_load(). - - In a normal startup, we create the space objects for - every table in the InnoDB data dictionary that has - an .ibd file. - We also determine the maximum tablespace id used. */ - dict_check_tablespaces_and_store_max_id(); + dict_check_tablespaces_and_store_max_id(nullptr); } if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO