diff --git a/cmake/os/Windows.cmake b/cmake/os/Windows.cmake index ec1029e20ac..ac8779f3ad8 100644 --- a/cmake/os/Windows.cmake +++ b/cmake/os/Windows.cmake @@ -282,6 +282,7 @@ STRING(APPEND CMAKE_CXX_STANDARD_LIBRARIES " ws2_32.lib synchronization.lib") # System checks SET(SIGNAL_WITH_VIO_CLOSE 1) # Something that runtime team needs +SET(HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT 1) # IPv6 constants appeared in Vista SDK first. We need to define them in any case if they are # not in headers, to handle dual mode sockets correctly. diff --git a/config.h.cmake b/config.h.cmake index 15f02d0e916..6d2fbee55d1 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -457,6 +457,11 @@ /* This should mean case insensitive file system */ #cmakedefine FN_NO_CASE_SENSE 1 +/* Whether an anonymous private mapping is unaccessible after +madvise(MADV_DONTNEED) or madvise(MADV_FREE) or similar has been invoked; +this is the case with Microsoft Windows VirtualFree(MEM_DECOMMIT) */ +#cmakedefine HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT 1 + #cmakedefine HAVE_CHARSET_armscii8 1 #cmakedefine HAVE_CHARSET_ascii 1 #cmakedefine HAVE_CHARSET_big5 1 diff --git a/extra/mariabackup/innobackupex.cc b/extra/mariabackup/innobackupex.cc index 2de57a14c85..20e1654564a 100644 --- a/extra/mariabackup/innobackupex.cc +++ b/extra/mariabackup/innobackupex.cc @@ -44,8 +44,8 @@ Street, Fifth Floor, Boston, MA 02110-1335 USA #include #include #include -#include #include +#include "buf0buf.h" #include #include #include @@ -594,8 +594,9 @@ static struct my_option ibx_long_options[] = "--apply-log.", (uchar*) &ibx_xtrabackup_use_memory, (uchar*) &ibx_xtrabackup_use_memory, - 0, GET_LL, REQUIRED_ARG, 100*1024*1024L, 1024*1024L, LONGLONG_MAX, 0, - 1024*1024L, 0}, + 0, GET_LL, REQUIRED_ARG, 96 << 20, + innodb_buffer_pool_extent_size, SIZE_T_MAX, 0, + innodb_buffer_pool_extent_size, 0}, {"innodb-force-recovery", OPT_INNODB_FORCE_RECOVERY, "This option starts up the embedded InnoDB instance in crash " diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index bea6e064eb6..382536cdf88 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -1413,8 +1413,8 @@ struct my_option xb_client_options[]= { "The value is used in place of innodb_buffer_pool_size. " "This option is only relevant when the --prepare option is specified.", (G_PTR *) &xtrabackup_use_memory, (G_PTR *) &xtrabackup_use_memory, 0, - GET_LL, REQUIRED_ARG, 100 * 1024 * 1024L, 1024 * 1024L, LONGLONG_MAX, 0, - 1024 * 1024L, 0}, + GET_LL, REQUIRED_ARG, 96 << 20, innodb_buffer_pool_extent_size, + SIZE_T_MAX, 0, innodb_buffer_pool_extent_size, 0}, {"throttle", OPT_XTRA_THROTTLE, "limit count of IO operations (pairs of read&write) per second to IOS " "values (for '--backup')", @@ -2489,7 +2489,7 @@ static bool innodb_init_param() } srv_sys_space.normalize_size(); - srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift); + srv_lock_table_size = 5 * buf_pool.curr_size(); /* -------------- Log files ---------------------------*/ @@ -2511,11 +2511,8 @@ static bool innodb_init_param() srv_adaptive_flushing = FALSE; - /* We set srv_pool_size here in units of 1 kB. InnoDB internally - changes the value so that it becomes the number of database pages. */ - - srv_buf_pool_size = (ulint) xtrabackup_use_memory; - srv_buf_pool_chunk_unit = srv_buf_pool_size; + buf_pool.size_in_bytes_max = size_t(xtrabackup_use_memory); + buf_pool.size_in_bytes_requested = buf_pool.size_in_bytes_max; srv_n_read_io_threads = (uint) innobase_read_io_threads; srv_n_write_io_threads = (uint) innobase_write_io_threads; diff --git a/include/my_sys.h b/include/my_sys.h index 4a227db0bbe..5eca29b3274 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -173,9 +173,13 @@ extern void my_free(void *ptr); extern void *my_memdup(PSI_memory_key key, const void *from,size_t length,myf MyFlags); extern char *my_strdup(PSI_memory_key key, const char *from,myf MyFlags); extern char *my_strndup(PSI_memory_key key, const char *from, size_t length, myf MyFlags); +extern my_bool my_use_large_pages; -int my_init_large_pages(my_bool super_large_pages); +int my_init_large_pages(void); uchar *my_large_malloc(size_t *size, myf my_flags); +#if defined _WIN32 || defined HAVE_MMAP +char *my_large_virtual_alloc(size_t *size); +#endif void my_large_free(void *ptr, size_t size); void my_large_page_truncate(size_t *size); diff --git a/include/my_virtual_mem.h b/include/my_virtual_mem.h new file mode 100644 index 00000000000..56b2f03b329 --- /dev/null +++ b/include/my_virtual_mem.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2025, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#pragma once +/* + Functionality for handling virtual memory + (reserve, commit, decommit, release) +*/ +#include /*size_t*/ + +#ifdef __cplusplus +extern "C" { +#endif + +char *my_virtual_mem_reserve(size_t *size); +char *my_virtual_mem_commit(char *ptr, size_t size); +void my_virtual_mem_decommit(char *ptr, size_t size); +void my_virtual_mem_release(char *ptr, size_t size); + +#ifdef __cplusplus +} +#endif + diff --git a/mysql-test/main/large_pages.opt b/mysql-test/main/large_pages.opt index 857c9c1ecee..bff2c2f6898 100644 --- a/mysql-test/main/large_pages.opt +++ b/mysql-test/main/large_pages.opt @@ -1 +1 @@ ---large-pages +--large-pages --loose-innodb-buffer-pool-size-max=16m diff --git a/mysql-test/main/large_pages.result b/mysql-test/main/large_pages.result index 9d03e646ddf..c5e73f044a9 100644 --- a/mysql-test/main/large_pages.result +++ b/mysql-test/main/large_pages.result @@ -1,4 +1,5 @@ call mtr.add_suppression("\\[Warning\\] (mysqld|mariadbd): Couldn't allocate [0-9]+ bytes \\((Large/HugeTLB memory|MEMLOCK) page size [0-9]+\\).*"); +call mtr.add_suppression("\\[ERROR\\]*Lock Pages in memory access rights required.*"); create table t1 ( a int not null auto_increment, b char(16) not null, diff --git a/mysql-test/main/large_pages.test b/mysql-test/main/large_pages.test index 136273a2821..7c0f497c6d3 100644 --- a/mysql-test/main/large_pages.test +++ b/mysql-test/main/large_pages.test @@ -1,11 +1,9 @@ # Test of large pages (or at least the fallback to conventional allocation) -# Windows needs SeLockMemoryPrivilege ---source include/not_windows.inc --source include/have_innodb.inc call mtr.add_suppression("\\[Warning\\] (mysqld|mariadbd): Couldn't allocate [0-9]+ bytes \\((Large/HugeTLB memory|MEMLOCK) page size [0-9]+\\).*"); - +call mtr.add_suppression("\\[ERROR\\]*Lock Pages in memory access rights required.*"); create table t1 ( a int not null auto_increment, b char(16) not null, diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt b/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt index 70797302d01..788e69a612d 100644 --- a/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt +++ b/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt @@ -1,2 +1,2 @@ ---innodb_buffer_pool_size=5M +--innodb_buffer_pool_size=6M --innodb_encrypt_temporary_tables=1 diff --git a/mysql-test/suite/innodb/r/buf_pool_resize_oom.result b/mysql-test/suite/innodb/r/buf_pool_resize_oom.result deleted file mode 100644 index 0bff75701a0..00000000000 --- a/mysql-test/suite/innodb/r/buf_pool_resize_oom.result +++ /dev/null @@ -1,8 +0,0 @@ -# -# Bug #21348684 SIGABRT DURING RESIZING THE INNODB BUFFER POOL -# ONLINE WITH MEMORY FULL CONDITION -# -call mtr.add_suppression("InnoDB: failed to allocate the chunk array"); -SET GLOBAL debug_dbug='+d,buf_pool_resize_chunk_null'; -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + 1048576; -# restart diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result index 1a8f16e4efb..ea053308b2e 100644 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result +++ b/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result @@ -1,4 +1,4 @@ -call mtr.add_suppression("InnoDB: Cannot allocate memory for the buffer pool"); +call mtr.add_suppression("InnoDB: Cannot map innodb_buffer_pool_size_max="); call mtr.add_suppression("InnoDB: Plugin initialization aborted at srv0start.cc.*"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error."); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed."); @@ -6,4 +6,4 @@ call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE faile # MDEV-25019 memory allocation failures during startup cause server failure in different, confusing ways # # restart: --debug_dbug=+d,ib_buf_chunk_init_fails -FOUND 1 /\[ERROR\] InnoDB: Cannot allocate memory for the buffer pool/ in mysqld.1.err +FOUND 1 /\[ERROR\] InnoDB: Cannot map innodb_buffer_pool_size_max=16m/ in mysqld.1.err diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result index cafa3f45eab..5db74a71636 100644 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result +++ b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result @@ -1,27 +1,28 @@ +# +# MDEV-29445: Reorganize buffer pool (and remove chunks) +# set global innodb_adaptive_hash_index=ON; select @@innodb_buffer_pool_size; @@innodb_buffer_pool_size 8388608 +set global innodb_buffer_pool_size = 9437184; set global innodb_buffer_pool_size = 10485760; select @@innodb_buffer_pool_size; @@innodb_buffer_pool_size 10485760 -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; -set @`v_id` := 0; -set @`v_val` := 0; -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; -set global innodb_buffer_pool_size = 64 * 1024 * 1024 + 512 * 1024; -Warnings: -Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '67633152' -select @@innodb_buffer_pool_size; -@@innodb_buffer_pool_size -68157440 +create table t1 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t1 SELECT seq*4,seq*4 FROM seq_1_to_262144; +set global innodb_buffer_pool_size = 7340032; select count(val) from t1; count(val) 262144 set global innodb_adaptive_hash_index=OFF; -set global innodb_buffer_pool_size = 25165824; +set global innodb_buffer_pool_size = 24117248; +set global innodb_buffer_pool_size = 26214400; +Warnings: +Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '26214400' select @@innodb_buffer_pool_size; @@innodb_buffer_pool_size 25165824 @@ -29,4 +30,12 @@ select count(val) from t1; count(val) 262144 drop table t1; -drop view view0; +SET GLOBAL innodb_max_purge_lag_wait = 0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; +SET GLOBAL innodb_buffer_pool_size = @old_innodb_buffer_pool_size; +SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result deleted file mode 100644 index d6b29060dc7..00000000000 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result +++ /dev/null @@ -1,14 +0,0 @@ -SET @save_size=@@innodb_buffer_pool_size; -# -# MDEV-27891: Delayed SIGSEGV in InnoDB buffer pool resize -# after or during DROP TABLE -# -select @@innodb_buffer_pool_chunk_size; -@@innodb_buffer_pool_chunk_size -1048576 -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; -SET GLOBAL innodb_buffer_pool_size=256*1024*1024; -DROP TABLE t1; -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + @@innodb_buffer_pool_chunk_size; -# End of 10.6 tests -SET GLOBAL innodb_buffer_pool_size=@save_size; diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result index 43d48023958..ec61bb9c0c3 100644 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result +++ b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result @@ -4,7 +4,32 @@ SET GLOBAL innodb_limit_optimistic_insert_debug=2; SET GLOBAL innodb_buffer_pool_size=16777216; CREATE TEMPORARY TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 SELECT seq FROM seq_1_to_200; +SET GLOBAL innodb_max_purge_lag_wait=0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +Variable_name Value +Innodb_buffer_pool_resize_status +connect con1,localhost,root; +SET DEBUG_SYNC='buf_pool_shrink_before_wakeup SIGNAL blocked WAIT_FOR go'; SET GLOBAL innodb_buffer_pool_size=8388608; +connection default; +SET DEBUG_SYNC='now WAIT_FOR blocked'; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +Variable_name Value +Innodb_buffer_pool_resize_status Withdrawing blocks. (505/505). +SET DEBUG_SYNC='now SIGNAL go'; +connection con1; +disconnect con1; +connection default; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +Variable_name Value +Innodb_buffer_pool_resize_status +SET DEBUG_SYNC=RESET; +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; SELECT COUNT(*),MIN(a),MAX(a) FROM t1; COUNT(*) MIN(a) MAX(a) 200 1 200 diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result deleted file mode 100644 index efb652091bf..00000000000 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result +++ /dev/null @@ -1,26 +0,0 @@ -select @@innodb_buffer_pool_chunk_size; -@@innodb_buffer_pool_chunk_size -4194304 -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; -set @`v_id` := 0; -set @`v_val` := 0; -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; -set global innodb_buffer_pool_size = 7340032; -Warnings: -Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '7340032' -select count(val) from t1; -count(val) -262144 -set global innodb_buffer_pool_size = 16777216; -select count(val) from t1; -count(val) -262144 -drop table t1; -drop view view0; -set global innodb_buffer_pool_size = 2*1048576; -Warnings: -Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '2097152' -select @@innodb_buffer_pool_size; -@@innodb_buffer_pool_size -4194304 diff --git a/mysql-test/suite/innodb/r/lock_memory_debug.result b/mysql-test/suite/innodb/r/lock_memory_debug.result index 36d7433974e..90150805707 100644 --- a/mysql-test/suite/innodb/r/lock_memory_debug.result +++ b/mysql-test/suite/innodb/r/lock_memory_debug.result @@ -5,7 +5,7 @@ call mtr.add_suppression("\\[Warning\\] InnoDB: Over 67 percent of the buffer po CREATE TABLE t1 (col1 INT) ENGINE=InnoDB; INSERT INTO t1 VALUES (1),(2),(3),(4),(5); SET STATEMENT debug_dbug='+d,innodb_skip_lock_bitmap' FOR -INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g LIMIT 45000; +INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g; ERROR HY000: The total number of locks exceeds the lock table size SELECT COUNT(*) FROM t1; COUNT(*) diff --git a/mysql-test/suite/innodb/r/log_upgrade_101_flags.result b/mysql-test/suite/innodb/r/log_upgrade_101_flags.result index 6ae7c84807f..f33c0d0798e 100644 --- a/mysql-test/suite/innodb/r/log_upgrade_101_flags.result +++ b/mysql-test/suite/innodb/r/log_upgrade_101_flags.result @@ -1,7 +1,7 @@ call mtr.add_suppression("InnoDB: The change buffer is corrupted"); call mtr.add_suppression("InnoDB: Tablespace size stored in header is 768 pages, but the sum of data file sizes is 384 pages"); call mtr.add_suppression("InnoDB: adjusting FSP_SPACE_FLAGS of file"); -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=10M +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=11M SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); diff --git a/mysql-test/suite/innodb/r/recovery_memory.result b/mysql-test/suite/innodb/r/recovery_memory.result index 9aba9bccdb3..372adbf4f9d 100644 --- a/mysql-test/suite/innodb/r/recovery_memory.result +++ b/mysql-test/suite/innodb/r/recovery_memory.result @@ -12,7 +12,7 @@ END LOOP connect con1,localhost,root,,,; CALL dorepeat(); connection default; -# restart: --innodb_buffer_pool_size=5242880 +# restart: --innodb_buffer_pool_size=6m DROP TABLE t1; DROP PROCEDURE dorepeat; # diff --git a/mysql-test/suite/innodb/r/restart,16k.rdiff b/mysql-test/suite/innodb/r/restart,16k.rdiff deleted file mode 100644 index 3149b9aeab0..00000000000 --- a/mysql-test/suite/innodb/r/restart,16k.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '5242879' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 5242880 for innodb_page_size=16384 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '5242879' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff --git a/mysql-test/suite/innodb/r/restart,32k.rdiff b/mysql-test/suite/innodb/r/restart,32k.rdiff deleted file mode 100644 index 3f00646cb37..00000000000 --- a/mysql-test/suite/innodb/r/restart,32k.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '10485759' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 10485760 for innodb_page_size=32768 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '10485759' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff --git a/mysql-test/suite/innodb/r/restart,4k.rdiff b/mysql-test/suite/innodb/r/restart,4k.rdiff deleted file mode 100644 index b00c56ef81f..00000000000 --- a/mysql-test/suite/innodb/r/restart,4k.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '2097151' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 2097152 for innodb_page_size=4096 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '2097151' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff --git a/mysql-test/suite/innodb/r/restart,64k.rdiff b/mysql-test/suite/innodb/r/restart,64k.rdiff deleted file mode 100644 index 886cbcde7d9..00000000000 --- a/mysql-test/suite/innodb/r/restart,64k.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '20971519' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 20971520 for innodb_page_size=65536 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '20971519' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff --git a/mysql-test/suite/innodb/r/restart,8k.rdiff b/mysql-test/suite/innodb/r/restart,8k.rdiff deleted file mode 100644 index 40a9e1bad1c..00000000000 --- a/mysql-test/suite/innodb/r/restart,8k.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '3145727' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 3145728 for innodb_page_size=8192 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '3145727' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff --git a/mysql-test/suite/innodb/r/restart.result b/mysql-test/suite/innodb/r/restart.result index 95d79a0ab14..ba6a87b5ef7 100644 --- a/mysql-test/suite/innodb/r/restart.result +++ b/mysql-test/suite/innodb/r/restart.result @@ -30,19 +30,6 @@ SELECT * FROM td; a DROP TABLE tr,tc,td; # -# MDEV-27467 innodb to enfore the minimum innodb_buffer_pool_size in SET (resize) the same as startup -# -SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; -SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); -ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -SHOW WARNINGS; -Level Code Message -Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE -Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); -SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; -# # MDEV-27882 Innodb - recognise MySQL-8.0 innodb flags and give a specific error message # FOUND 1 /InnoDB: MySQL-8\.0 tablespace in \./ibdata1/ in attempted_start.err diff --git a/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt b/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt deleted file mode 100644 index 09fd8bd8e35..00000000000 --- a/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb-buffer-pool-size=8m --innodb-buffer-pool-chunk-size=1m diff --git a/mysql-test/suite/innodb/t/buf_pool_resize_oom.test b/mysql-test/suite/innodb/t/buf_pool_resize_oom.test deleted file mode 100644 index ea13129e8b1..00000000000 --- a/mysql-test/suite/innodb/t/buf_pool_resize_oom.test +++ /dev/null @@ -1,27 +0,0 @@ ---source include/have_innodb.inc ---source include/have_debug.inc ---source include/not_embedded.inc - ---echo # ---echo # Bug #21348684 SIGABRT DURING RESIZING THE INNODB BUFFER POOL ---echo # ONLINE WITH MEMORY FULL CONDITION ---echo # - -call mtr.add_suppression("InnoDB: failed to allocate the chunk array"); - -SET GLOBAL debug_dbug='+d,buf_pool_resize_chunk_null'; - ---disable_warnings -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + 1048576; ---enable_warnings - -let $wait_timeout = 60; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 27) = 'Resizing buffer pool failed' - FROM information_schema.global_status - WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS'; - ---source include/wait_condition.inc -# Restart the server, because the buffer pool would not necessarily be -# shrunk afterwards even if we request it. ---source include/restart_mysqld.inc diff --git a/mysql-test/suite/innodb/t/innodb-index-online.opt b/mysql-test/suite/innodb/t/innodb-index-online.opt index 1837463f07a..0aa69cc9e20 100644 --- a/mysql-test/suite/innodb/t/innodb-index-online.opt +++ b/mysql-test/suite/innodb/t/innodb-index-online.opt @@ -1,5 +1,5 @@ --loose-innodb-sort-buffer-size=64k --loose-innodb-online-alter-log-max-size=128k ---loose-innodb-buffer-pool-size=5M +--loose-innodb-buffer-pool-size=6M --loose-innodb-sys-indexes --loose-innodb-sys-fields diff --git a/mysql-test/suite/innodb/t/innodb-table-online-master.opt b/mysql-test/suite/innodb/t/innodb-table-online-master.opt index 1eafb5ac188..42f6b2832e5 100644 --- a/mysql-test/suite/innodb/t/innodb-table-online-master.opt +++ b/mysql-test/suite/innodb/t/innodb-table-online-master.opt @@ -1 +1 @@ ---innodb-sort-buffer-size=64k --innodb-online-alter-log-max-size=512k --innodb-buffer-pool-size=5M +--innodb-sort-buffer-size=64k --innodb-online-alter-log-max-size=512k --innodb-buffer-pool-size=6M diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt new file mode 100644 index 00000000000..95f86e59920 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt @@ -0,0 +1 @@ +--innodb-buffer-pool-size-max=16m diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test index e8e070c5061..f082e7d7fd4 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test @@ -1,6 +1,6 @@ --source include/have_innodb.inc --source include/have_debug.inc -call mtr.add_suppression("InnoDB: Cannot allocate memory for the buffer pool"); +call mtr.add_suppression("InnoDB: Cannot map innodb_buffer_pool_size_max="); call mtr.add_suppression("InnoDB: Plugin initialization aborted at srv0start.cc.*"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error."); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed."); @@ -10,5 +10,5 @@ call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE faile let restart_parameters=--debug_dbug=+d,ib_buf_chunk_init_fails; --source include/restart_mysqld.inc let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; -let SEARCH_PATTERN=\[ERROR\] InnoDB: Cannot allocate memory for the buffer pool; +let SEARCH_PATTERN=\[ERROR\] InnoDB: Cannot map innodb_buffer_pool_size_max=16m; --source include/search_pattern_in_file.inc diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt index 39543543a53..19074aa9024 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt @@ -1,2 +1,3 @@ --innodb-buffer-pool-size=8M +--innodb-buffer-pool-size-max=24M --innodb-page-size=4k diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test index 051f38a572e..612a0c1be64 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test @@ -1,17 +1,13 @@ -# -# WL6117 : Resize the InnoDB Buffer Pool Online -# - --source include/have_innodb.inc ---source include/big_test.inc +--source include/have_sequence.inc -let $wait_timeout = 180; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; +--echo # +--echo # MDEV-29445: Reorganize buffer pool (and remove chunks) +--echo # --disable_query_log +call mtr.add_suppression("InnoDB: Over 67 percent of the buffer pool is occupied by lock heaps"); +call mtr.add_suppression("innodb_buffer_pool_size change aborted"); set @old_innodb_buffer_pool_size = @@innodb_buffer_pool_size; set @old_innodb_adaptive_hash_index = @@innodb_adaptive_hash_index; --enable_query_log @@ -21,10 +17,9 @@ set global innodb_adaptive_hash_index=ON; select @@innodb_buffer_pool_size; # Expand buffer pool +set global innodb_buffer_pool_size = 9437184; set global innodb_buffer_pool_size = 10485760; ---source include/wait_condition.inc - select @@innodb_buffer_pool_size; # fill buffer pool @@ -32,41 +27,48 @@ select @@innodb_buffer_pool_size; SET @save_innodb_read_only_compressed=@@GLOBAL.innodb_read_only_compressed; SET GLOBAL innodb_read_only_compressed=OFF; --enable_query_log -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; +create table t1 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -set @`v_id` := 0; -set @`v_val` := 0; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t1 SELECT seq*4,seq*4 FROM seq_1_to_262144; -# 2^18 == 262144 records -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; --disable_query_log SET GLOBAL innodb_read_only_compressed=@save_innodb_read_only_compressed; --enable_query_log -# Shrink buffer pool -set global innodb_buffer_pool_size = 64 * 1024 * 1024 + 512 * 1024; ---source include/wait_condition.inc - -select @@innodb_buffer_pool_size; +# Attempt to shrink the buffer pool. This may occasionally fail. +--error 0,ER_WRONG_USAGE +set global innodb_buffer_pool_size = 7340032; select count(val) from t1; set global innodb_adaptive_hash_index=OFF; -# Expand buffer pool to 24MB -set global innodb_buffer_pool_size = 25165824; ---source include/wait_condition.inc +# Expand buffer pool to 23 and then 24 MiB (requesting 25 MiB) +set global innodb_buffer_pool_size = 24117248; +set global innodb_buffer_pool_size = 26214400; select @@innodb_buffer_pool_size; select count(val) from t1; drop table t1; -drop view view0; ---disable_query_log -set global innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; -set global innodb_buffer_pool_size = @old_innodb_buffer_pool_size; ---enable_query_log +SET GLOBAL innodb_max_purge_lag_wait = 0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; + +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; +let $wait_condition = +SELECT variable_value = 0 +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; --source include/wait_condition.inc +# this may occasionally be aborted on a heavily loaded builder +--error 0,ER_WRONG_USAGE +SET GLOBAL innodb_buffer_pool_size = @old_innodb_buffer_pool_size; +SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt deleted file mode 100644 index 72f055d3b58..00000000000 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt +++ /dev/null @@ -1,2 +0,0 @@ ---innodb-buffer-pool-chunk-size=1M ---loose-skip-innodb-disable-resize_buffer_pool_debug diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test deleted file mode 100644 index db5da2924fa..00000000000 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test +++ /dev/null @@ -1,28 +0,0 @@ ---source include/have_innodb.inc ---source include/big_test.inc - -SET @save_size=@@innodb_buffer_pool_size; - -let $wait_timeout = 60; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS'; - ---echo # ---echo # MDEV-27891: Delayed SIGSEGV in InnoDB buffer pool resize ---echo # after or during DROP TABLE ---echo # - -select @@innodb_buffer_pool_chunk_size; -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; -SET GLOBAL innodb_buffer_pool_size=256*1024*1024; -DROP TABLE t1; ---source include/wait_condition.inc -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + @@innodb_buffer_pool_chunk_size; ---source include/wait_condition.inc - ---echo # End of 10.6 tests - -SET GLOBAL innodb_buffer_pool_size=@save_size; ---source include/wait_condition.inc diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt deleted file mode 100644 index dca040ea893..00000000000 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb-buffer-pool-size=8M --innodb-buffer-pool-chunk-size=2M diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt new file mode 100644 index 00000000000..95f86e59920 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt @@ -0,0 +1 @@ +--innodb-buffer-pool-size-max=16m diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test index c49ae451638..59afed24521 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test @@ -1,24 +1,43 @@ --source include/have_innodb.inc --source include/have_sequence.inc --source include/have_debug.inc +--source include/have_debug_sync.inc SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; SET @save_size=@@GLOBAL.innodb_buffer_pool_size; SET GLOBAL innodb_limit_optimistic_insert_debug=2; - SET GLOBAL innodb_buffer_pool_size=16777216; CREATE TEMPORARY TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 SELECT seq FROM seq_1_to_200; -SET GLOBAL innodb_buffer_pool_size=8388608; +# Flush the buffer pool to prevent +# "innodb_buffer_pool_size change aborted" error with ./mtr --repeat=3 +SET GLOBAL innodb_max_purge_lag_wait=0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; -let $wait_timeout = 60; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS'; ---source include/wait_condition.inc +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +connect con1,localhost,root; +SET DEBUG_SYNC='buf_pool_shrink_before_wakeup SIGNAL blocked WAIT_FOR go'; +send SET GLOBAL innodb_buffer_pool_size=8388608; +connection default; +SET DEBUG_SYNC='now WAIT_FOR blocked'; +# adjust for 32-bit +--replace_result 504/504 505/505 +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +SET DEBUG_SYNC='now SIGNAL go'; +connection con1; +reap; +disconnect con1; +connection default; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +SET DEBUG_SYNC=RESET; + +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; SELECT COUNT(*),MIN(a),MAX(a) FROM t1; DROP TEMPORARY TABLE t1; diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt deleted file mode 100644 index ade197de338..00000000000 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt +++ /dev/null @@ -1,3 +0,0 @@ ---innodb-buffer-pool-size=16M ---innodb-buffer-pool-chunk-size=4M ---innodb-page-size=4k diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test deleted file mode 100644 index 78db6bf0d5a..00000000000 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test +++ /dev/null @@ -1,61 +0,0 @@ -# -# WL6117 : Resize the InnoDB Buffer Pool Online -# (innodb_buffer_pool_chunk_size used case) -# - ---source include/have_innodb.inc ---source include/big_test.inc - -let $wait_timeout = 180; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; - ---disable_query_log -set @old_innodb_buffer_pool_size = @@innodb_buffer_pool_size; ---enable_query_log - -select @@innodb_buffer_pool_chunk_size; - -# fill buffer pool ---disable_query_log -SET @save_innodb_read_only_compressed=@@GLOBAL.innodb_read_only_compressed; -SET GLOBAL innodb_read_only_compressed=OFF; ---enable_query_log -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; - -set @`v_id` := 0; -set @`v_val` := 0; - -# 2^18 == 262144 records -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; ---disable_query_log -SET GLOBAL innodb_read_only_compressed=@save_innodb_read_only_compressed; ---enable_query_log - -# Shrink buffer pool to 7MB -set global innodb_buffer_pool_size = 7340032; ---source include/wait_condition.inc - -select count(val) from t1; - -# Expand buffer pool to 16MB -set global innodb_buffer_pool_size = 16777216; ---source include/wait_condition.inc - -select count(val) from t1; - -drop table t1; -drop view view0; - -# Try to shrink buffer pool to smaller than chunk size -set global innodb_buffer_pool_size = 2*1048576; ---source include/wait_condition.inc -select @@innodb_buffer_pool_size; - ---disable_query_log -set global innodb_buffer_pool_size = @old_innodb_buffer_pool_size; ---enable_query_log ---source include/wait_condition.inc diff --git a/mysql-test/suite/innodb/t/lock_memory_debug.opt b/mysql-test/suite/innodb/t/lock_memory_debug.opt index 67c8423cf2a..184ec4096a8 100644 --- a/mysql-test/suite/innodb/t/lock_memory_debug.opt +++ b/mysql-test/suite/innodb/t/lock_memory_debug.opt @@ -1 +1 @@ ---innodb_buffer_pool_size=5M +--innodb_buffer_pool_size=6M diff --git a/mysql-test/suite/innodb/t/lock_memory_debug.test b/mysql-test/suite/innodb/t/lock_memory_debug.test index 588356f7fa4..58a76740dcb 100644 --- a/mysql-test/suite/innodb/t/lock_memory_debug.test +++ b/mysql-test/suite/innodb/t/lock_memory_debug.test @@ -15,7 +15,7 @@ INSERT INTO t1 VALUES (1),(2),(3),(4),(5); --error ER_LOCK_TABLE_FULL SET STATEMENT debug_dbug='+d,innodb_skip_lock_bitmap' FOR -INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g LIMIT 45000; +INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g; SELECT COUNT(*) FROM t1; diff --git a/mysql-test/suite/innodb/t/log_upgrade_101_flags.test b/mysql-test/suite/innodb/t/log_upgrade_101_flags.test index 4358ccfa1ca..a9b567bfbb3 100644 --- a/mysql-test/suite/innodb/t/log_upgrade_101_flags.test +++ b/mysql-test/suite/innodb/t/log_upgrade_101_flags.test @@ -73,7 +73,7 @@ print OUT chr(0); close OUT or die; EOF ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=10M +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=11M --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' diff --git a/mysql-test/suite/innodb/t/mem_pressure.opt b/mysql-test/suite/innodb/t/mem_pressure.opt new file mode 100644 index 00000000000..8c6cf8729eb --- /dev/null +++ b/mysql-test/suite/innodb/t/mem_pressure.opt @@ -0,0 +1,2 @@ +--innodb-buffer-pool-size-max=17m +--innodb-buffer-pool-size=17m diff --git a/mysql-test/suite/innodb/t/mem_pressure.test b/mysql-test/suite/innodb/t/mem_pressure.test index 5dbcea439a3..578d6d44841 100644 --- a/mysql-test/suite/innodb/t/mem_pressure.test +++ b/mysql-test/suite/innodb/t/mem_pressure.test @@ -1,5 +1,4 @@ --source include/have_debug.inc ---source include/have_cgroupv2.inc --source include/not_embedded.inc --source include/have_innodb.inc --source include/have_sequence.inc diff --git a/mysql-test/suite/innodb/t/purge_secondary.opt b/mysql-test/suite/innodb/t/purge_secondary.opt index 2821c98397c..fd3b12811e8 100644 --- a/mysql-test/suite/innodb/t/purge_secondary.opt +++ b/mysql-test/suite/innodb/t/purge_secondary.opt @@ -1,4 +1,4 @@ --innodb-sys-tablestats ---innodb_buffer_pool_size=5M +--innodb_buffer_pool_size=6M --innodb_monitor_enable=module_buffer --skip-innodb-stats-persistent diff --git a/mysql-test/suite/innodb/t/recovery_memory.test b/mysql-test/suite/innodb/t/recovery_memory.test index 06101377f10..51c0ce73b78 100644 --- a/mysql-test/suite/innodb/t/recovery_memory.test +++ b/mysql-test/suite/innodb/t/recovery_memory.test @@ -22,7 +22,7 @@ send CALL dorepeat(); connection default; sleep 10; let $shutdown_timeout=0; -let $restart_parameters=--innodb_buffer_pool_size=5242880; +let $restart_parameters=--innodb_buffer_pool_size=6m; --source include/restart_mysqld.inc DROP TABLE t1; DROP PROCEDURE dorepeat; @@ -33,11 +33,11 @@ DROP PROCEDURE dorepeat; --echo # if ($have_debug) { SET DEBUG_DBUG="+d,ib_log_checkpoint_avoid_hard"; -let $restart_parameters=--innodb_buffer_pool_size=5242880 --debug_dbug=+d,ibuf_init_corrupt; +let $restart_parameters=--innodb_buffer_pool_size=6m --debug_dbug=+d,ibuf_init_corrupt; } if (!$have_debug) { --echo SET DEBUG_DBUG="+d,ib_log_checkpoint_avoid_hard"; -let $restart_parameters=--innodb_buffer_pool_size=5242880; +let $restart_parameters=--innodb_buffer_pool_size=6m; } CREATE TABLE t1(f1 INT NOT NULL)ENGINE=InnoDB; INSERT INTO t1 SELECT * FROM seq_1_to_65536; diff --git a/mysql-test/suite/innodb/t/restart.opt b/mysql-test/suite/innodb/t/restart.opt deleted file mode 100644 index ce43e89cb2b..00000000000 --- a/mysql-test/suite/innodb/t/restart.opt +++ /dev/null @@ -1,2 +0,0 @@ ---loose-innodb_disable_resize_buffer_pool_debug=0 ---innodb-buffer-pool-chunk-size=1M diff --git a/mysql-test/suite/innodb/t/restart.test b/mysql-test/suite/innodb/t/restart.test index 727353b2492..fc6c2bfabd5 100644 --- a/mysql-test/suite/innodb/t/restart.test +++ b/mysql-test/suite/innodb/t/restart.test @@ -92,31 +92,6 @@ SELECT * FROM tc; SELECT * FROM td; DROP TABLE tr,tc,td; ---echo # ---echo # MDEV-27467 innodb to enfore the minimum innodb_buffer_pool_size in SET (resize) the same as startup ---echo # - -let $wait_timeout = 180; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; - ---disable_cursor_protocol -SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; -SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; ---enable_cursor_protocol ---error ER_WRONG_VALUE_FOR_VAR -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); - -SHOW WARNINGS; - -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - ---source include/wait_condition.inc - -SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; - --echo # --echo # MDEV-27882 Innodb - recognise MySQL-8.0 innodb flags and give a specific error message --echo # diff --git a/mysql-test/suite/innodb/t/update_time-master.opt b/mysql-test/suite/innodb/t/update_time-master.opt deleted file mode 100644 index f0fd647546d..00000000000 --- a/mysql-test/suite/innodb/t/update_time-master.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb-buffer-pool-size=5M diff --git a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result index e5dd6820420..6e4dad90127 100644 --- a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result @@ -1,16 +1,17 @@ SET @start_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size; -'#---------------------BS_STVARS_022_01----------------------#' -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); -COUNT(@@GLOBAL.innodb_buffer_pool_size) -1 -1 Expected '#---------------------BS_STVARS_022_02----------------------#' -SET @@GLOBAL.innodb_buffer_pool_size=10485760; -Expected succeeded -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); -COUNT(@@GLOBAL.innodb_buffer_pool_size) +SELECT @@GLOBAL.innodb_buffer_pool_size_max; +@@GLOBAL.innodb_buffer_pool_size_max +8388608 +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +@@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max +1 +SET GLOBAL innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max + 1048576; +Warnings: +Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '9437184' +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +@@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max 1 -1 Expected '#---------------------BS_STVARS_022_03----------------------#' SELECT @@GLOBAL.innodb_buffer_pool_size = VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES @@ -18,10 +19,6 @@ WHERE VARIABLE_NAME='innodb_buffer_pool_size'; @@GLOBAL.innodb_buffer_pool_size = VARIABLE_VALUE 1 1 Expected -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); -COUNT(@@GLOBAL.innodb_buffer_pool_size) -1 -1 Expected SELECT COUNT(VARIABLE_VALUE) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME='innodb_buffer_pool_size'; @@ -50,4 +47,7 @@ COUNT(@@GLOBAL.innodb_buffer_pool_size) 1 Expected SELECT innodb_buffer_pool_size = @@SESSION.innodb_buffer_pool_size; ERROR 42S22: Unknown column 'innodb_buffer_pool_size' in 'SELECT' -# restart +SET GLOBAL innodb_buffer_pool_size = @start_buffer_pool_size; +SELECT @@innodb_buffer_pool_size = @start_buffer_pool_size; +@@innodb_buffer_pool_size = @start_buffer_pool_size +1 diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff index 185abfd6f14..1aa3de321c0 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff @@ -24,7 +24,7 @@ VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED - VARIABLE_COMMENT Size of a single memory chunk for resizing buffer pool. Online buffer pool resizing happens at this granularity. 0 means autosize this variable based on buffer pool size. + VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 0 -NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_MAX_VALUE 4294967295 @@ -40,7 +40,35 @@ VARIABLE_COMMENT Dump only the hottest N% of each buffer pool, defaults to 25 NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 100 -@@ -215,7 +215,7 @@ +@@ -203,10 +203,10 @@ + SESSION_VALUE NULL + DEFAULT_VALUE 134217728 + VARIABLE_SCOPE GLOBAL +-VARIABLE_TYPE BIGINT UNSIGNED ++VARIABLE_TYPE INT UNSIGNED + VARIABLE_COMMENT The size of the memory buffer InnoDB uses to cache data and indexes of its tables. + NUMERIC_MIN_VALUE 2097152 +-NUMERIC_MAX_VALUE 18446744073701163008 ++NUMERIC_MAX_VALUE 4292870144 + NUMERIC_BLOCK_SIZE 1048576 + ENUM_VALUE_LIST NULL + READ_ONLY NO +@@ -215,11 +215,11 @@ + SESSION_VALUE NULL + DEFAULT_VALUE 0 + VARIABLE_SCOPE GLOBAL +-VARIABLE_TYPE BIGINT UNSIGNED ++VARIABLE_TYPE INT UNSIGNED + VARIABLE_COMMENT Maximum innodb_buffer_pool_size + NUMERIC_MIN_VALUE 0 +-NUMERIC_MAX_VALUE 18446744073701163008 +-NUMERIC_BLOCK_SIZE 8388608 ++NUMERIC_MAX_VALUE 4292870144 ++NUMERIC_BLOCK_SIZE 2097152 + ENUM_VALUE_LIST NULL + READ_ONLY YES + COMMAND_LINE_ARGUMENT REQUIRED +@@ -227,7 +227,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -49,7 +77,7 @@ VARIABLE_COMMENT A number between [0, 100] that tells how oftern buffer pool dump status in percentages should be printed. E.g. 10 means that buffer pool dump status is printed when every 10% of number of buffer pool pages are dumped. Default is 0 (only start and end status is printed). NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 100 -@@ -335,7 +335,7 @@ +@@ -347,7 +347,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 5 VARIABLE_SCOPE GLOBAL @@ -58,7 +86,7 @@ VARIABLE_COMMENT If the compression failure rate of a table is greater than this number more padding is added to the pages to reduce the failures. A value of zero implies no padding NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 100 -@@ -359,7 +359,7 @@ +@@ -371,7 +371,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 50 VARIABLE_SCOPE GLOBAL @@ -67,7 +95,7 @@ VARIABLE_COMMENT Percentage of empty space on a data page that can be reserved to make the page compressible. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 75 -@@ -647,7 +647,7 @@ +@@ -659,7 +659,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 600 VARIABLE_SCOPE GLOBAL @@ -76,7 +104,7 @@ VARIABLE_COMMENT Maximum number of seconds that semaphore times out in InnoDB. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 4294967295 -@@ -695,7 +695,7 @@ +@@ -707,7 +707,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 30 VARIABLE_SCOPE GLOBAL @@ -85,7 +113,7 @@ VARIABLE_COMMENT Number of iterations over which the background flushing is averaged. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 1000 -@@ -719,7 +719,7 @@ +@@ -731,7 +731,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -94,7 +122,7 @@ VARIABLE_COMMENT Controls the durability/speed trade-off for commits. Set to 0 (write and flush redo log to disk only once per second), 1 (flush to disk at each commit), 2 (write to log at commit but flush to disk only once per second) or 3 (flush to disk at prepare and at commit, slower and usually redundant). 1 and 3 guarantees that after a crash, committed transactions will not be lost and will be consistent with the binlog and other transactional engines. 2 can get inconsistent and lose transactions if there is a power failure or kernel crash but not if mysqld crashes. 0 has no guarantees in case of crash. 0 and 2 can be faster than 1 or 3. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 3 -@@ -743,7 +743,7 @@ +@@ -755,7 +755,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -103,7 +131,7 @@ VARIABLE_COMMENT Set to 0 (don't flush neighbors from buffer pool), 1 (flush contiguous neighbors from buffer pool) or 2 (flush neighbors from buffer pool), when flushing a block NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 2 -@@ -779,7 +779,7 @@ +@@ -791,7 +791,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -112,7 +140,7 @@ VARIABLE_COMMENT Helps to save your data in case the disk image of the database becomes corrupt. Value 5 can return bogus data, and 6 can permanently corrupt data. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 6 -@@ -803,10 +803,10 @@ +@@ -815,10 +815,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 8000000 VARIABLE_SCOPE GLOBAL @@ -125,7 +153,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -839,7 +839,7 @@ +@@ -851,7 +851,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 84 VARIABLE_SCOPE GLOBAL @@ -134,7 +162,7 @@ VARIABLE_COMMENT InnoDB Fulltext search maximum token size in characters NUMERIC_MIN_VALUE 10 NUMERIC_MAX_VALUE 84 -@@ -851,7 +851,7 @@ +@@ -863,7 +863,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 3 VARIABLE_SCOPE GLOBAL @@ -143,7 +171,7 @@ VARIABLE_COMMENT InnoDB Fulltext search minimum token size in characters NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 16 -@@ -863,7 +863,7 @@ +@@ -875,7 +875,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 2000 VARIABLE_SCOPE GLOBAL @@ -152,7 +180,7 @@ VARIABLE_COMMENT InnoDB Fulltext search number of words to optimize for each optimize table call NUMERIC_MIN_VALUE 1000 NUMERIC_MAX_VALUE 10000 -@@ -875,10 +875,10 @@ +@@ -887,10 +887,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 2000000000 VARIABLE_SCOPE GLOBAL @@ -165,7 +193,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -899,7 +899,7 @@ +@@ -911,7 +911,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 2 VARIABLE_SCOPE GLOBAL @@ -174,7 +202,7 @@ VARIABLE_COMMENT InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 16 -@@ -911,10 +911,10 @@ +@@ -923,10 +923,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 640000000 VARIABLE_SCOPE GLOBAL @@ -187,7 +215,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -959,7 +959,7 @@ +@@ -971,7 +971,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 200 VARIABLE_SCOPE GLOBAL @@ -196,7 +224,7 @@ VARIABLE_COMMENT Number of IOPs the server can do. Tunes the background IO rate NUMERIC_MIN_VALUE 100 NUMERIC_MAX_VALUE 4294967295 -@@ -971,7 +971,7 @@ +@@ -983,7 +983,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 4294967295 VARIABLE_SCOPE GLOBAL @@ -205,7 +233,7 @@ VARIABLE_COMMENT Limit to which innodb_io_capacity can be inflated. NUMERIC_MIN_VALUE 100 NUMERIC_MAX_VALUE 4294967295 -@@ -1091,10 +1091,10 @@ +@@ -1103,10 +1103,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 32 VARIABLE_SCOPE GLOBAL @@ -218,7 +246,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1103,10 +1103,10 @@ +@@ -1115,10 +1115,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 1536 VARIABLE_SCOPE GLOBAL @@ -231,7 +259,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1139,10 +1139,10 @@ +@@ -1151,10 +1151,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -244,7 +272,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1151,7 +1151,7 @@ +@@ -1163,7 +1163,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -253,7 +281,7 @@ VARIABLE_COMMENT Maximum delay of user threads in micro-seconds NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 10000000 -@@ -1283,10 +1283,10 @@ +@@ -1295,10 +1295,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -266,7 +294,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -1307,7 +1307,7 @@ +@@ -1319,7 +1319,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 16384 VARIABLE_SCOPE GLOBAL @@ -275,7 +303,7 @@ VARIABLE_COMMENT Page size to use for all InnoDB tablespaces. NUMERIC_MIN_VALUE 4096 NUMERIC_MAX_VALUE 65536 -@@ -1343,7 +1343,7 @@ +@@ -1355,7 +1355,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 127 VARIABLE_SCOPE GLOBAL @@ -284,7 +312,7 @@ VARIABLE_COMMENT Number of UNDO log pages to purge in one batch from the history list. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 5000 -@@ -1355,7 +1355,7 @@ +@@ -1367,7 +1367,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 128 VARIABLE_SCOPE GLOBAL @@ -293,7 +321,7 @@ VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 128 -@@ -1391,7 +1391,7 @@ +@@ -1403,7 +1403,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 56 VARIABLE_SCOPE GLOBAL @@ -302,7 +330,7 @@ VARIABLE_COMMENT Number of pages that must be accessed sequentially for InnoDB to trigger a readahead. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 64 -@@ -1475,7 +1475,7 @@ +@@ -1487,7 +1487,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1048576 VARIABLE_SCOPE GLOBAL @@ -311,7 +339,7 @@ VARIABLE_COMMENT Memory buffer size for index creation NUMERIC_MIN_VALUE 65536 NUMERIC_MAX_VALUE 67108864 -@@ -1643,10 +1643,10 @@ +@@ -1655,10 +1655,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 30 VARIABLE_SCOPE GLOBAL diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 0f84e3e17ba..1fddbe3fc1e 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -5,6 +5,7 @@ variable_name not in ( 'innodb_evict_tables_on_commit_debug', # one may want to override this 'innodb_use_native_aio', # default value depends on OS 'innodb_log_file_buffering', # only available on Linux and Windows +'innodb_buffer_pool_size_auto_min', # only available on Linux for now 'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing order by variable_name; VARIABLE_NAME INNODB_ADAPTIVE_FLUSHING @@ -96,7 +97,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED -VARIABLE_COMMENT Size of a single memory chunk for resizing buffer pool. Online buffer pool resizing happens at this granularity. 0 means autosize this variable based on buffer pool size. +VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 18446744073709551615 NUMERIC_BLOCK_SIZE 1048576 @@ -206,11 +207,23 @@ VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED VARIABLE_COMMENT The size of the memory buffer InnoDB uses to cache data and indexes of its tables. NUMERIC_MIN_VALUE 2097152 -NUMERIC_MAX_VALUE 9223372036854775807 +NUMERIC_MAX_VALUE 18446744073701163008 NUMERIC_BLOCK_SIZE 1048576 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_BUFFER_POOL_SIZE_MAX +SESSION_VALUE NULL +DEFAULT_VALUE 0 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT Maximum innodb_buffer_pool_size +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 18446744073701163008 +NUMERIC_BLOCK_SIZE 8388608 +ENUM_VALUE_LIST NULL +READ_ONLY YES +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_BUF_DUMP_STATUS_FREQUENCY SESSION_VALUE NULL DEFAULT_VALUE 0 diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt deleted file mode 100644 index aa536bf0070..00000000000 --- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb-buffer-pool-chunk-size=2M diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt new file mode 100644 index 00000000000..373ccf8732e --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt @@ -0,0 +1 @@ +--innodb-buffer-pool-size-max=8m diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test index dada2a9a455..1807ffaada1 100644 --- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test +++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test @@ -24,35 +24,19 @@ --source include/have_innodb.inc -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; - SET @start_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size; ---echo '#---------------------BS_STVARS_022_01----------------------#' -#################################################################### -# Displaying default value # -#################################################################### -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); ---echo 1 Expected - - --echo '#---------------------BS_STVARS_022_02----------------------#' #################################################################### # Check if Value can set # #################################################################### -SET @@GLOBAL.innodb_buffer_pool_size=10485760; ---echo Expected succeeded ---source include/wait_condition.inc - -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); ---echo 1 Expected - - - +--enable_warnings +SELECT @@GLOBAL.innodb_buffer_pool_size_max; +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +SET GLOBAL innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max + 1048576; +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +--disable_warnings --echo '#---------------------BS_STVARS_022_03----------------------#' ################################################################# @@ -66,9 +50,6 @@ WHERE VARIABLE_NAME='innodb_buffer_pool_size'; --enable_warnings --echo 1 Expected -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); ---echo 1 Expected - --disable_warnings SELECT COUNT(VARIABLE_VALUE) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES @@ -76,8 +57,6 @@ WHERE VARIABLE_NAME='innodb_buffer_pool_size'; --enable_warnings --echo 1 Expected - - --echo '#---------------------BS_STVARS_022_04----------------------#' ################################################################################ # Check if accessing variable with and without GLOBAL point to same variable # @@ -111,4 +90,6 @@ SELECT innodb_buffer_pool_size = @@SESSION.innodb_buffer_pool_size; # Restore the original buffer pool size. ---source include/restart_mysqld.inc +SET GLOBAL innodb_buffer_pool_size = @start_buffer_pool_size; + +SELECT @@innodb_buffer_pool_size = @start_buffer_pool_size; diff --git a/mysql-test/suite/sys_vars/t/sysvars_innodb.test b/mysql-test/suite/sys_vars/t/sysvars_innodb.test index 2680e442da4..7c4345cce25 100644 --- a/mysql-test/suite/sys_vars/t/sysvars_innodb.test +++ b/mysql-test/suite/sys_vars/t/sysvars_innodb.test @@ -12,5 +12,6 @@ select VARIABLE_NAME, SESSION_VALUE, DEFAULT_VALUE, VARIABLE_SCOPE, VARIABLE_TYP 'innodb_evict_tables_on_commit_debug', # one may want to override this 'innodb_use_native_aio', # default value depends on OS 'innodb_log_file_buffering', # only available on Linux and Windows + 'innodb_buffer_pool_size_auto_min', # only available on Linux for now 'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing order by variable_name; diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 5ab44931de3..ceae31f07eb 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -46,7 +46,8 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c my_default.c my_uuid.c wqueue.c waiting_threads.c ma_dyncol.c ../sql-common/my_time.c my_rdtsc.c psi_noop.c my_atomic_writes.c my_cpu.c my_likely.c my_largepage.c - file_logger.c my_dlerror.c crc32/crc32c.cc) + file_logger.c my_dlerror.c crc32/crc32c.cc + my_virtual_mem.c) IF (WIN32) SET (MYSYS_SOURCES ${MYSYS_SOURCES} diff --git a/mysys/my_largepage.c b/mysys/my_largepage.c index 71527a9bc27..240c8e84fc7 100644 --- a/mysys/my_largepage.c +++ b/mysys/my_largepage.c @@ -35,17 +35,11 @@ extern int memcntl(caddr_t, size_t, int, caddr_t, int, int); #endif /* __sun__ ... */ #endif /* HAVE_SOLARIS_LARGE_PAGES */ -#if defined(_WIN32) -static size_t my_large_page_size; -#define HAVE_LARGE_PAGES -#elif defined(HAVE_MMAP) -#define HAVE_LARGE_PAGES -#endif -#ifdef HAVE_LARGE_PAGES -static my_bool my_use_large_pages= 0; -#else -#define my_use_large_pages 0 +my_bool my_use_large_pages; + +#ifdef _WIN32 +static size_t my_large_page_size; #endif #if defined(HAVE_GETPAGESIZES) || defined(__linux__) @@ -172,7 +166,7 @@ static void my_get_large_page_sizes(size_t sizes[]) @retval a large page size that is valid on this system or 0 if no large page size possible. */ -#if defined(HAVE_MMAP) && !defined(_WIN32) +#ifndef _WIN32 static size_t my_next_large_page_size(size_t sz, int *start) { DBUG_ENTER("my_next_large_page_size"); @@ -188,11 +182,12 @@ static size_t my_next_large_page_size(size_t sz, int *start) } DBUG_RETURN(0); } -#endif /* defined(MMAP) || !defined(_WIN32) */ +#endif -int my_init_large_pages(my_bool super_large_pages) +int my_init_large_pages(void) { + my_use_large_pages= 1; #ifdef _WIN32 if (!my_obtain_privilege(SE_LOCK_MEMORY_NAME)) { @@ -200,19 +195,15 @@ int my_init_large_pages(my_bool super_large_pages) "Lock Pages in memory access rights required for use with" " large-pages, see https://mariadb.com/kb/en/library/" "mariadb-memory-allocation/#huge-pages", MYF(MY_WME)); + my_use_large_pages= 0; } my_large_page_size= GetLargePageMinimum(); #endif - my_use_large_pages= 1; my_get_large_page_sizes(my_large_page_sizes); -#ifndef HAVE_LARGE_PAGES - my_printf_error(EE_OUTOFMEMORY, "No large page support on this platform", - MYF(MY_WME)); -#endif - #ifdef HAVE_SOLARIS_LARGE_PAGES + extern my_bool opt_super_large_pages; /* tell the kernel that we want to use 4/256MB page for heap storage and also for the stack. We use 4 MByte as default and if the @@ -222,9 +213,15 @@ int my_init_large_pages(my_bool super_large_pages) measured in a number of GBytes. We use as big pages as possible which isn't bigger than the above desired page sizes. + + Note: This refers to some implementations of the SPARC ISA, + where the supported page sizes are + 8KiB, 64KiB, 512KiB, 4MiB, 32MiB, 256MiB, 2GiB, and 16GiB. + On implementations of the AMD64 ISA, the available page sizes + should be 4KiB, 2MiB, and 1GiB. */ int nelem= 0; - size_t max_desired_page_size= (super_large_pages ? 256 : 4) * 1024 * 1024; + size_t max_desired_page_size= opt_super_large_pages ? 256 << 20 : 4 << 20; size_t max_page_size= my_next_large_page_size(max_desired_page_size, &nelem); if (max_page_size > 0) @@ -426,6 +423,118 @@ uchar *my_large_malloc(size_t *size, myf my_flags) DBUG_RETURN(ptr); } +#ifdef _WIN32 +/** + Special large pages allocator, with possibility to commit to allocating + more memory later. + Every implementation returns a zero filled buffer here. +*/ +char *my_large_virtual_alloc(size_t *size) +{ + char *ptr; + DBUG_ENTER("my_large_virtual_alloc"); + + if (my_use_large_pages) + { + size_t s= *size; + s= MY_ALIGN(s, (size_t) my_large_page_size); + ptr= VirtualAlloc(NULL, s, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, + PAGE_READWRITE); + if (ptr) + { + *size= s; + DBUG_RETURN(ptr); + } + } + + DBUG_RETURN(VirtualAlloc(NULL, *size, MEM_RESERVE, PAGE_READWRITE)); +} +#elif defined HAVE_MMAP +/** + Special large pages allocator, with possibility to commit to allocating + more memory later. + Every implementation returns a zero filled buffer here. +*/ +char *my_large_mmap(size_t *size, int prot) +{ + char *ptr; + DBUG_ENTER("my_large_virtual_alloc"); + + if (my_use_large_pages) + { + size_t large_page_size; + int page_i= 0; + prot= PROT_READ | PROT_WRITE; + + while ((large_page_size= my_next_large_page_size(*size, &page_i)) != 0) + { + int mapflag= MAP_PRIVATE | +# ifdef MAP_POPULATE + MAP_POPULATE | +# endif +# if defined MAP_HUGETLB /* linux 2.6.32 */ + MAP_HUGETLB | +# if defined MAP_HUGE_SHIFT /* Linux-3.8+ */ + my_bit_log2_size_t(large_page_size) << MAP_HUGE_SHIFT | +# else +# warning "No explicit large page (HUGETLB pages) support in Linux < 3.8" +# endif +# elif defined MAP_ALIGNED + MAP_ALIGNED(my_bit_log2_size_t(large_page_size)) | +# if defined MAP_ALIGNED_SUPER + MAP_ALIGNED_SUPER | +# endif +# endif + OS_MAP_ANON; + + size_t aligned_size= MY_ALIGN(*size, (size_t) large_page_size); + ptr= mmap(NULL, aligned_size, prot, mapflag, -1, 0); + if (ptr == (void*) -1) + { + ptr= NULL; + /* try next smaller memory size */ + if (errno == ENOMEM) + continue; + + /* other errors are more serious */ + break; + } + else /* success */ + { + /* + we do need to record the adjustment so that munmap gets called with + the right size. This is only the case for HUGETLB pages. + */ + *size= aligned_size; + DBUG_RETURN(ptr); + } + } + } + + ptr= mmap(NULL, *size, prot, +# ifdef MAP_NORESERVE + MAP_NORESERVE | +# endif + MAP_PRIVATE | OS_MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + ptr= NULL; + } + + DBUG_RETURN(ptr); +} + +/** + Special large pages allocator, with possibility to commit to allocating + more memory later. + Every implementation returns a zero filled buffer here. +*/ +char *my_large_virtual_alloc(size_t *size) +{ + return my_large_mmap(size, PROT_READ | PROT_WRITE); +} +#endif /** General large pages deallocator. @@ -482,7 +591,7 @@ void my_large_free(void *ptr, size_t size) #endif /* memory_sanitizer */ #else my_free_lock(ptr); -#endif /* HAVE_MMMAP */ +#endif /* HAVE_MMAP */ DBUG_VOID_RETURN; } diff --git a/mysys/my_virtual_mem.c b/mysys/my_virtual_mem.c new file mode 100644 index 00000000000..47e3a29788a --- /dev/null +++ b/mysys/my_virtual_mem.c @@ -0,0 +1,207 @@ +/* Copyright (c) 2025, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include +#include +#include +#ifdef _AIX +# include +#endif + +/* + Functionality for handling virtual memory + + - reserve range, + - commit memory (within reserved range) + - decommit previously commited memory + - release range + + Not every OS has a "reserve" functionality, i.e it is not always + possible to reserve memory larger than swap or RAM for example. + + We try to respect use_large_pages setting, on Windows and Linux +*/ +#ifndef _WIN32 +char *my_large_mmap(size_t *size, int prot); +#endif + +char *my_virtual_mem_reserve(size_t *size) +{ +#ifdef _WIN32 + DWORD flags= my_use_large_pages + ? MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT + : MEM_RESERVE; + char *ptr= VirtualAlloc(NULL, *size, flags, PAGE_READWRITE); + if (!ptr && (flags & MEM_LARGE_PAGES)) + { + /* Try without large pages */ + ptr= VirtualAlloc(NULL, *size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (!ptr) + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), *size); + } + return ptr; +#else + return my_large_mmap(size, PROT_NONE); +#endif +} + +#if defined _WIN32 && !defined DBUG_OFF +static my_bool is_memory_committed(char *ptr, size_t size) +{ + MEMORY_BASIC_INFORMATION mbi; + if (VirtualQuery(ptr, &mbi, sizeof mbi) == 0) + DBUG_ASSERT(0); + return !!(mbi.State & MEM_COMMIT); +} +#endif + +char *my_virtual_mem_commit(char *ptr, size_t size) +{ + DBUG_ASSERT(ptr); +#ifdef _WIN32 + if (my_use_large_pages) + { + DBUG_ASSERT(is_memory_committed(ptr, size)); + } + else + { + void *p= VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE); + DBUG_ASSERT(p == ptr); + if (!p) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + return NULL; + } + } +#else + if (my_use_large_pages) + /* my_large_mmap() already created a read/write mapping. */; + else + { +# ifdef _AIX + /* + MAP_FIXED does not not work on IBM AIX in the way does works elsewhere. + Apparently, it is not possible to mmap(2) a range that is already in use, + at least not by default. + + mprotect(2) is the fallback, it can't communicate out-of-memory + conditions, but it looks like overcommitting is not possible on + AIX anyway. + */ + if (mprotect(ptr, size, PROT_READ | PROT_WRITE)) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + return NULL; + } +# else + void *p= 0; + const int flags= +# ifdef MAP_POPULATE + MAP_POPULATE | +# endif + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; + p= mmap(ptr, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (p == MAP_FAILED) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + return NULL; + } + DBUG_ASSERT(p == ptr); +# if defined MADV_FREE_REUSABLE && defined MADV_FREE_REUSE /* Apple macOS */ + madvise(ptr, size, MADV_FREE_REUSE); /* cancel MADV_FREE_REUSABLE */ +# endif +# endif + } +#endif + update_malloc_size(size, 0); + return ptr; +} + +void my_virtual_mem_decommit(char *ptr, size_t size) +{ +#ifdef _WIN32 + DBUG_ASSERT(is_memory_committed(ptr, size)); +# ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT +# error "VirtualFree(MEM_DECOMMIT) will not allow subsequent reads!" +# endif + if (!my_use_large_pages) + { + if (!VirtualFree(ptr, size, MEM_DECOMMIT)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, + GetLastError()); + DBUG_ASSERT(0); + } + } +#else + const int prot= +# ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT + /* + In InnoDB, buf_pool_t::page_guess() may deference pointers to + this, assuming that either the original contents or zeroed + contents is available. + */ + PROT_READ +# else + /* We will explicitly mark the memory unaccessible. */ + PROT_NONE +# endif + ; +# ifdef _AIX + disclaim(ptr, size, DISCLAIM_ZEROMEM); +# elif defined __linux__ || defined __osf__ + madvise(ptr, size, MADV_DONTNEED); /* OSF/1, Linux mimicing AIX disclaim() */ +# elif defined MADV_FREE_REUSABLE && defined MADV_FREE_REUSE + /* Mac OS X 10.9; undocumented in Apple macOS */ + madvise(ptr, size, MADV_FREE_REUSABLE); /* macOS mimicing AIX disclaim() */ +# elif defined MADV_PURGE /* Illumos */ + madvise(ptr, size, MADV_PURGE); /* Illumos mimicing AIX disclaim() */ +# elif defined MADV_FREE + /* FreeBSD, NetBSD, OpenBSD, Dragonfly BSD, OpenSolaris, Apple macOS */ + madvise(ptr, size, MADV_FREE); /* allow lazy zeroing out */ +# elif defined MADV_DONTNEED +# warning "It is unclear if madvise(MADV_DONTNEED) works as intended" + madvise(ptr, size, MADV_DONTNEED); +# else +# warning "Do not know how to decommit memory" +# endif + if (mprotect(ptr, size, prot)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, errno); + DBUG_ASSERT(0); + } +#endif + update_malloc_size(-(longlong) size, 0); +} + +void my_virtual_mem_release(char *ptr, size_t size) +{ +#ifdef _WIN32 + DBUG_ASSERT(my_use_large_pages || !is_memory_committed(ptr, size)); + if (!VirtualFree(ptr, 0, MEM_RELEASE)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, + GetLastError()); + DBUG_ASSERT(0); + } +#else + if (munmap(ptr, size)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, errno); + DBUG_ASSERT(0); + } +#endif +} diff --git a/sql/mysql_install_db.cc b/sql/mysql_install_db.cc index e424141b151..61fa17596aa 100644 --- a/sql/mysql_install_db.cc +++ b/sql/mysql_install_db.cc @@ -336,7 +336,7 @@ static char *init_bootstrap_command_line(char *cmdline, size_t size) " --bootstrap" " --datadir=." " --tmpdir=." - " --loose-innodb-buffer-pool-size=20M" + " --loose-innodb-buffer-pool-size=21M" "\"" , mysqld_path, opt_verbose_bootstrap ? "--console" : ""); return cmdline; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 901e19fcb6a..51e0b4a3c8c 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -420,7 +420,9 @@ my_bool opt_require_secure_transport= 0; char* opt_secure_file_priv; my_bool lower_case_file_system= 0; my_bool opt_large_pages= 0; +#ifdef HAVE_SOLARIS_LARGE_PAGES my_bool opt_super_large_pages= 0; +#endif my_bool opt_myisam_use_mmap= 0; uint opt_large_page_size= 0; #if defined(ENABLED_DEBUG_SYNC) @@ -4118,7 +4120,7 @@ static int init_common_variables() if (opt_large_pages) { DBUG_PRINT("info", ("Large page set")); - if (my_init_large_pages(opt_super_large_pages)) + if (my_init_large_pages()) { return 1; } @@ -7872,7 +7874,9 @@ static int mysql_init_variables(void) bzero((char*) &global_status_var, offsetof(STATUS_VAR, last_cleared_system_status_var)); opt_large_pages= 0; +#ifdef HAVE_SOLARIS_LARGE_PAGES opt_super_large_pages= 0; +#endif #if defined(ENABLED_DEBUG_SYNC) opt_debug_sync_timeout= 0; #endif /* defined(ENABLED_DEBUG_SYNC) */ diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index 1d66560363b..75256526a32 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -195,7 +195,7 @@ static void btr_search_disable_ref_count(dict_table_t *table) } /** Lazily free detached metadata when removing the last reference. */ -ATTRIBUTE_COLD static void btr_search_lazy_free(dict_index_t *index) +ATTRIBUTE_COLD void btr_search_lazy_free(dict_index_t *index) { ut_ad(index->freed()); dict_table_t *table= index->table; @@ -219,8 +219,7 @@ ATTRIBUTE_COLD static void btr_search_lazy_free(dict_index_t *index) table->autoinc_mutex.wr_unlock(); } -/** Disable the adaptive hash search system and empty the index. */ -void btr_search_disable() +ATTRIBUTE_COLD bool btr_search_disable() { dict_table_t* table; @@ -231,7 +230,7 @@ void btr_search_disable() if (!btr_search_enabled) { dict_sys.unfreeze(); btr_search_x_unlock_all(); - return; + return false; } btr_search_enabled = false; @@ -259,23 +258,25 @@ void btr_search_disable() btr_search_sys.clear(); btr_search_x_unlock_all(); + + return true; } /** Enable the adaptive hash search system. @param resize whether buf_pool_t::resize() is the caller */ -void btr_search_enable(bool resize) +ATTRIBUTE_COLD void btr_search_enable(bool resize) { if (!resize) { mysql_mutex_lock(&buf_pool.mutex); - bool changed = srv_buf_pool_old_size != srv_buf_pool_size; + const auto is_shrinking = buf_pool.is_shrinking(); mysql_mutex_unlock(&buf_pool.mutex); - if (changed) { + if (is_shrinking) { return; } } btr_search_x_lock_all(); - ulint hash_size = buf_pool_get_curr_size() / sizeof(void *) / 64; + ulint hash_size = buf_pool.curr_size() / sizeof(void *) / 64; if (btr_search_sys.parts[0].heap) { ut_ad(btr_search_enabled); @@ -939,88 +940,6 @@ btr_search_failure(btr_search_t* info, btr_cur_t* cursor) info->last_hash_succ = FALSE; } -/** Clear the adaptive hash index on all pages in the buffer pool. */ -inline void buf_pool_t::clear_hash_index() noexcept -{ - ut_ad(!resizing); - ut_ad(!btr_search_enabled); - - std::set garbage; - - for (chunk_t *chunk= chunks + n_chunks; chunk-- != chunks; ) - { - for (buf_block_t *block= chunk->blocks, * const end= block + chunk->size; - block != end; block++) - { - dict_index_t *index= block->index; - assert_block_ahi_valid(block); - - /* We can clear block->index and block->n_pointers when - holding all AHI latches exclusively; see the comments in buf0buf.h */ - - if (!index) - { -# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ut_a(!block->n_pointers); -# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - continue; - } - - ut_d(const auto s= block->page.state()); - /* Another thread may have set the state to - REMOVE_HASH in buf_LRU_block_remove_hashed(). - - The state change in buf_pool_t::realloc() is not observable - here, because in that case we would have !block->index. - - In the end, the entire adaptive hash index will be removed. */ - ut_ad(s >= buf_page_t::UNFIXED || s == buf_page_t::REMOVE_HASH); -# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - block->n_pointers= 0; -# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - if (index->freed()) - garbage.insert(index); - block->index= nullptr; - } - } - - for (dict_index_t *index : garbage) - btr_search_lazy_free(index); -} - -/** Get a buffer block from an adaptive hash index pointer. -This function does not return if the block is not identified. -@param ptr pointer to within a page frame -@return pointer to block, never NULL */ -inline buf_block_t* buf_pool_t::block_from_ahi(const byte *ptr) const noexcept -{ - chunk_t::map *chunk_map = chunk_t::map_ref; - ut_ad(chunk_t::map_ref == chunk_t::map_reg); - ut_ad(!resizing); - - chunk_t::map::const_iterator it= chunk_map->upper_bound(ptr); - ut_a(it != chunk_map->begin()); - - chunk_t *chunk= it == chunk_map->end() - ? chunk_map->rbegin()->second - : (--it)->second; - - const size_t offs= size_t(ptr - chunk->blocks->page.frame) >> - srv_page_size_shift; - ut_a(offs < chunk->size); - - buf_block_t *block= &chunk->blocks[offs]; - /* buf_pool_t::chunk_t::init() invokes buf_block_init() so that - block[n].frame == block->page.frame + n * srv_page_size. Check it. */ - ut_ad(block->page.frame == page_align(ptr)); - /* Read the state of the block without holding hash_lock. - A state transition to REMOVE_HASH is possible during - this execution. */ - ut_ad(block->page.state() >= buf_page_t::REMOVE_HASH); - - return block; -} - /** Tries to guess the right search position based on the hash search info of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts, and the function returns TRUE, then cursor->up_match and cursor->low_match @@ -1103,7 +1022,8 @@ fail: return false; } - buf_block_t* block = buf_pool.block_from_ahi(rec); + buf_block_t* block = buf_pool.block_from(rec); + ut_ad(block->page.frame == page_align(rec)); buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get( block->page.id().fold()); @@ -2196,7 +2116,7 @@ func_exit: for (; node != NULL; node = node->next) { const buf_block_t* block - = buf_pool.block_from_ahi((byte*) node->data); + = buf_pool.block_from(node->data); index_id_t page_index_id; if (UNIV_LIKELY(block->page.in_file())) { diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index 2c6b652d18f..12620ec8a2a 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -162,6 +162,20 @@ buf_buddy_get( } #ifdef UNIV_DEBUG +const buf_block_t *buf_pool_t::contains_zip(const void *data, size_t shift) + const noexcept +{ + const size_t d= size_t(data) >> shift; + + for (size_t i= 0; i < n_blocks; i++) + { + const buf_block_t *block= get_nth_page(i); + if (size_t(block->page.zip.data) >> shift == d) + return block; + } + return nullptr; +} + /** Validate a given zip_free list. */ struct CheckZipFree { CheckZipFree(ulint i) : m_i(i) {} @@ -257,13 +271,10 @@ buf_buddy_is_free( /** Add a block to the head of the appropriate buddy free list. @param[in,out] buf block to be freed @param[in] i index of buf_pool.zip_free[] */ -UNIV_INLINE -void -buf_buddy_add_to_free(buf_buddy_free_t* buf, ulint i) +static void buf_buddy_add_to_free(buf_buddy_free_t *buf, ulint i) { mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(buf_pool.zip_free[i].start != buf); - buf_buddy_stamp_free(buf, i); UT_LIST_ADD_FIRST(buf_pool.zip_free[i], buf); ut_d(buf_buddy_list_validate(i)); @@ -272,9 +283,7 @@ buf_buddy_add_to_free(buf_buddy_free_t* buf, ulint i) /** Remove a block from the appropriate buddy free list. @param[in,out] buf block to be freed @param[in] i index of buf_pool.zip_free[] */ -UNIV_INLINE -void -buf_buddy_remove_from_free(buf_buddy_free_t* buf, ulint i) +static void buf_buddy_remove_from_free(buf_buddy_free_t *buf, ulint i) { mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(buf_buddy_check_free(buf, i)); @@ -298,13 +307,10 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i) buf = UT_LIST_GET_FIRST(buf_pool.zip_free[i]); - if (buf_pool.is_shrinking() - && UT_LIST_GET_LEN(buf_pool.withdraw) - < buf_pool.withdraw_target) { - + if (size_t size = buf_pool.shrinking_size()) { while (buf != NULL && buf_pool.will_be_withdrawn( - reinterpret_cast(buf))) { + reinterpret_cast(buf), size)) { /* This should be withdrawn, not to be allocated */ buf = UT_LIST_GET_NEXT(list, buf); } @@ -312,6 +318,7 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i) if (buf) { buf_buddy_remove_from_free(buf, i); + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i)); } else if (i + 1 < BUF_BUDDY_SIZES) { /* Attempt to split. */ buf = buf_buddy_alloc_zip(i + 1); @@ -321,7 +328,6 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i) reinterpret_cast( reinterpret_cast(buf) + (BUF_BUDDY_LOW << i)); - ut_ad(!buf_pool.contains_zip(buddy)); buf_buddy_add_to_free(buddy, i); } } @@ -340,74 +346,52 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i) return(buf); } +#ifdef UNIV_DEBUG +/** number of blocks allocated to the buddy system */ +static size_t buf_buddy_n_frames; +#endif + /** Deallocate a buffer frame of srv_page_size. @param buf buffer frame to deallocate */ static void buf_buddy_block_free(void *buf) noexcept { mysql_mutex_assert_owner(&buf_pool.mutex); - ut_a(!ut_align_offset(buf, srv_page_size)); - - const ulint fold= BUF_POOL_ZIP_FOLD_PTR(buf); - buf_page_t **prev= buf_pool.zip_hash.cell_get(fold)-> - search(&buf_page_t::hash, [buf](const buf_page_t *b) - { - ut_ad(b->in_zip_hash); - ut_ad(b->state() == buf_page_t::MEMORY); - return b->frame == buf; - }); - - buf_page_t *bpage= *prev; - ut_a(bpage); - ut_a(bpage->frame == buf); - ut_d(bpage->in_zip_hash= false); - *prev= bpage->hash; - bpage->hash= nullptr; - + buf_block_t *block= buf_pool.block_from(buf); + ut_ad(block->page.state() == buf_page_t::MEMORY); + ut_ad(block->page.frame == buf); + ut_ad(!buf_pool.contains_zip(buf, srv_page_size_shift)); ut_d(memset(buf, 0, srv_page_size)); MEM_UNDEFINED(buf, srv_page_size); - - buf_LRU_block_free_non_file_page(reinterpret_cast(bpage)); - ut_ad(buf_pool.buddy_n_frames > 0); - ut_d(buf_pool.buddy_n_frames--); + buf_LRU_block_free_non_file_page(block); + ut_ad(buf_buddy_n_frames > 0); + ut_d(buf_buddy_n_frames--); } /** Allocate a buffer block to the buddy allocator. @param block buffer block to register */ static void buf_buddy_block_register(buf_block_t *block) noexcept { - const ulint fold= BUF_POOL_ZIP_FOLD(block); + ut_ad(buf_pool.is_uncompressed_current(block)); ut_ad(block->page.state() == buf_page_t::MEMORY); - - ut_a(block->page.frame); - ut_a(!ut_align_offset(block->page.frame, srv_page_size)); - - ut_ad(!block->page.in_zip_hash); - ut_d(block->page.in_zip_hash= true); - buf_pool.zip_hash.cell_get(fold)->append(block->page, &buf_page_t::hash); - ut_d(buf_pool.buddy_n_frames++); + ut_d(buf_buddy_n_frames++); } /** Allocate a block from a bigger object. @param[in] buf a block that is free to use @param[in] i index of buf_pool.zip_free[] -@param[in] j size of buf as an index of buf_pool.zip_free[] @return allocated block */ -static -void* -buf_buddy_alloc_from(void* buf, ulint i, ulint j) +static void *buf_buddy_alloc_from(void *buf, ulint i) { - ulint offs = BUF_BUDDY_LOW << j; - ut_ad(j <= BUF_BUDDY_SIZES); ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - ut_ad(j >= i); - ut_ad(!ut_align_offset(buf, offs)); + ut_ad(i <= BUF_BUDDY_SIZES); + ut_ad(!ut_align_offset(buf, srv_page_size)); + ut_ad(!buf_pool.contains_zip(buf, srv_page_size)); /* Add the unused parts of the block to the free lists. */ - while (j > i) { + for (ulint j = BUF_BUDDY_SIZES, offs = srv_page_size; j-- > i; ) { buf_buddy_free_t* zip_buf; offs >>= 1; - j--; zip_buf = reinterpret_cast( reinterpret_cast(buf) + offs); @@ -422,7 +406,7 @@ buf_buddy_alloc_from(void* buf, ulint i, ulint j) @param i index of buf_pool.zip_free[] or BUF_BUDDY_SIZES @param lru assigned to true if buf_pool.mutex was temporarily released @return allocated block, never NULL */ -byte *buf_buddy_alloc_low(ulint i, bool *lru) +byte *buf_buddy_alloc_low(ulint i, bool *lru) noexcept { buf_block_t* block; @@ -439,7 +423,7 @@ byte *buf_buddy_alloc_low(ulint i, bool *lru) } /* Try allocating from the buf_pool.free list. */ - block = buf_LRU_get_free_only(); + block = buf_pool.allocate(); if (block) { goto alloc_big; @@ -455,21 +439,21 @@ alloc_big: buf_buddy_block_register(block); block = reinterpret_cast( - buf_buddy_alloc_from(block->page.frame, i, BUF_BUDDY_SIZES)); + buf_buddy_alloc_from(block->page.frame, i)); func_exit: buf_pool.buddy_stat[i].used++; return reinterpret_cast(block); } -/** Try to relocate a block. The caller must hold zip_free_mutex, and this -function will release and lock it again. +/** Try to relocate a block. @param[in] src block to relocate @param[in] dst free block to relocated to @param[in] i index of buf_pool.zip_free[] @param[in] force true if we must relocated always @return true if relocated */ -static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) +static bool buf_buddy_relocate(void *src, void *dst, ulint i, bool force) + noexcept { buf_page_t* bpage; const ulint size = BUF_BUDDY_LOW << i; @@ -575,7 +559,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) @param[in] buf block to be freed, must not be pointed to by the buffer pool @param[in] i index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */ -void buf_buddy_free_low(void* buf, ulint i) +void buf_buddy_free_low(void* buf, ulint i) noexcept { buf_buddy_free_t* buddy; @@ -595,13 +579,12 @@ recombine: ut_ad(i < BUF_BUDDY_SIZES); ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i)); - ut_ad(!buf_pool.contains_zip(buf)); + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i)); /* Do not recombine blocks if there are few free blocks. We may waste up to 15360*max_len bytes to free blocks (1024 + 2048 + 4096 + 8192 = 15360) */ - if (UT_LIST_GET_LEN(buf_pool.zip_free[i]) < 16 - && !buf_pool.is_shrinking()) { + if (UT_LIST_GET_LEN(buf_pool.zip_free[i]) < 16) { goto func_exit; } @@ -615,10 +598,9 @@ recombine: /* The buddy is free: recombine */ buf_buddy_remove_from_free(buddy, i); buddy_is_free: - ut_ad(!buf_pool.contains_zip(buddy)); i++; buf = ut_align_down(buf, BUF_BUDDY_LOW << i); - + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i)); goto recombine; case BUF_BUDDY_STATE_USED: @@ -655,107 +637,125 @@ func_exit: buf_buddy_add_to_free(reinterpret_cast(buf), i); } -/** Try to reallocate a block. -@param[in] buf buf_pool block to be reallocated -@param[in] size block size, up to srv_page_size -@return whether the reallocation succeeded */ -bool -buf_buddy_realloc(void* buf, ulint size) +/** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::resize(). +@param bpage page descriptor covering a ROW_FORMAT=COMPRESSED page +@param block uncompressed block for storage +@return block +@retval nullptr if the block was consumed */ +ATTRIBUTE_COLD +buf_block_t *buf_buddy_shrink(buf_page_t *bpage, buf_block_t *block) noexcept { - buf_block_t* block = NULL; - ulint i = buf_buddy_get_slot(size); + ut_ad(bpage->zip.data); - mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(i <= BUF_BUDDY_SIZES); - ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); + void *dst= nullptr; + ulint size= page_zip_get_size(&bpage->zip); + ulint i= buf_buddy_get_slot(size); - if (i < BUF_BUDDY_SIZES) { - /* Try to allocate from the buddy system. */ - block = reinterpret_cast(buf_buddy_alloc_zip(i)); - } + ut_ad(buf_pool.will_be_withdrawn(bpage->zip.data, size)); + ut_ad(bpage->can_relocate()); + ut_ad(i <= BUF_BUDDY_SIZES); + ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - if (block == NULL) { - /* Try allocating from the buf_pool.free list. */ - block = buf_LRU_get_free_only(); + if (UNIV_LIKELY(i < BUF_BUDDY_SIZES)) + dst= buf_buddy_alloc_zip(i); - if (block == NULL) { - return(false); /* free_list was not enough */ - } + if (!dst) + { + buf_buddy_block_register(block); + dst= buf_buddy_alloc_from(block->page.frame, i); + ut_ad(dst); + block= nullptr; + } - buf_buddy_block_register(block); + void *src= bpage->zip.data; + memcpy_aligned(dst, src, size); + bpage->zip.data= static_cast(dst); + buf_pool.buddy_stat[i].relocated++; - block = reinterpret_cast( - buf_buddy_alloc_from( - block->page.frame, i, BUF_BUDDY_SIZES)); - } + for (;;) + { + MEM_UNDEFINED(src, BUF_BUDDY_LOW << i); + ut_ad(i < BUF_BUDDY_SIZES); + /* Try to combine adjacent blocks. */ + buf_buddy_free_t *buddy= reinterpret_cast + (buf_buddy_get(static_cast(src), BUF_BUDDY_LOW << i)); - buf_pool.buddy_stat[i].used++; + if (buf_buddy_is_free(buddy, i) != BUF_BUDDY_STATE_FREE) + { + ut_ad(!buf_pool.contains_zip(src, BUF_BUDDY_LOW_SHIFT + i)); + buf_buddy_add_to_free(static_cast(src), i); + break; + } - /* Try to relocate the buddy of buf to the free block. */ - if (buf_buddy_relocate(buf, block, i, true)) { - /* succeeded */ - buf_buddy_free_low(buf, i); - } else { - /* failed */ - buf_buddy_free_low(block, i); - } + /* The buddy is free: recombine */ + buf_buddy_remove_from_free(buddy, i); + i++; + src= ut_align_down(src, BUF_BUDDY_LOW << i); + if (i == BUF_BUDDY_SIZES) + { + buf_buddy_block_free(src); + break; + } + } - return(true); /* free_list was enough */ + return block; } -/** Combine all pairs of free buddies. */ -void buf_buddy_condense_free() +/** Combine all pairs of free buddies. +@param size the target innodb_buffer_pool_size */ +ATTRIBUTE_COLD void buf_buddy_condense_free(size_t size) noexcept { - mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(buf_pool.is_shrinking()); + ut_ad(size); + ut_ad(size == buf_pool.shrinking_size()); - for (ulint i = 0; i < UT_ARR_SIZE(buf_pool.zip_free); ++i) { - buf_buddy_free_t* buf = - UT_LIST_GET_FIRST(buf_pool.zip_free[i]); + for (ulint i= 0; i < array_elements(buf_pool.zip_free); i++) + { + buf_buddy_free_t *buf= UT_LIST_GET_FIRST(buf_pool.zip_free[i]); - /* seek to withdraw target */ - while (buf != NULL - && !buf_pool.will_be_withdrawn( - reinterpret_cast(buf))) { - buf = UT_LIST_GET_NEXT(list, buf); - } + /* seek to withdraw target */ + while (buf && + !buf_pool.will_be_withdrawn(reinterpret_cast(buf), size)) + buf= UT_LIST_GET_NEXT(list, buf); - while (buf != NULL) { - buf_buddy_free_t* next = - UT_LIST_GET_NEXT(list, buf); + for (buf_buddy_free_t *next= buf; buf; buf= next) + { + buf_buddy_free_t *buddy= reinterpret_cast + (buf_buddy_get(reinterpret_cast(buf), BUF_BUDDY_LOW << i)); - buf_buddy_free_t* buddy = - reinterpret_cast( - buf_buddy_get( - reinterpret_cast(buf), - BUF_BUDDY_LOW << i)); + /* seek to the next withdraw target */ + do + { + while ((next= UT_LIST_GET_NEXT(list, next)) && + !buf_pool.will_be_withdrawn(reinterpret_cast(next), + size)) {} + } + while (buddy == next); - /* seek to the next withdraw target */ - while (true) { - while (next != NULL - && !buf_pool.will_be_withdrawn( - reinterpret_cast(next))) { - next = UT_LIST_GET_NEXT(list, next); - } + if (buf_buddy_is_free(buddy, i) != BUF_BUDDY_STATE_FREE) + continue; - if (buddy != next) { - break; - } + buf_buddy_remove_from_free(buf, i); + ulint j= i; + recombine: + buf_buddy_remove_from_free(buddy, j); + j++; + buf= static_cast + (ut_align_down(buf, BUF_BUDDY_LOW << j)); + MEM_UNDEFINED(buf, BUF_BUDDY_LOW << j); - next = UT_LIST_GET_NEXT(list, next); - } + if (j == BUF_BUDDY_SIZES) + { + buf_buddy_block_free(buf); + continue; + } - if (buf_buddy_is_free(buddy, i) - == BUF_BUDDY_STATE_FREE) { - /* Both buf and buddy are free. - Try to combine them. */ - buf_buddy_remove_from_free(buf, i); - buf_pool.buddy_stat[i].used++; + buddy= reinterpret_cast + (buf_buddy_get(reinterpret_cast(buf), BUF_BUDDY_LOW << j)); + if (buf_buddy_is_free(buddy, j) == BUF_BUDDY_STATE_FREE) + goto recombine; - buf_buddy_free_low(buf, i); - } - - buf = next; - } - } + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + j)); + buf_buddy_add_to_free(buf, j); + } + } } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 20222631434..27c7f2f584e 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -47,8 +47,6 @@ Created 11/5/1995 Heikki Tuuri #include "lock0lock.h" #include "btr0sea.h" #include "ibuf0ibuf.h" -#include "trx0undo.h" -#include "trx0purge.h" #include "log0log.h" #include "dict0stats_bg.h" #include "srv0srv.h" @@ -64,6 +62,7 @@ Created 11/5/1995 Heikki Tuuri #include #include #include "log.h" +#include "my_virtual_mem.h" using st_::span; @@ -277,6 +276,56 @@ the read requests for the whole area. */ #ifndef UNIV_INNOCHECKSUM +/** Compute the number of page frames needed for buf_block_t, +per innodb_buffer_pool_extent_size. +@param ps innodb_page_size +@return number of buf_block_t frames per extent */ +static constexpr uint8_t first_page(size_t ps) +{ + return uint8_t(innodb_buffer_pool_extent_size / ps - + innodb_buffer_pool_extent_size / (ps + sizeof(buf_block_t))); +} + +/** Compute the number of bytes needed for buf_block_t, +per innodb_buffer_pool_extent_size. +@param ps innodb_page_size +@return number of buf_block_t frames per extent */ +static constexpr size_t first_frame(size_t ps) +{ + return first_page(ps) * ps; +} + +/** Compute the number of pages per innodb_buffer_pool_extent_size. +@param ps innodb_page_size +@return number of buf_block_t frames per extent */ +static constexpr uint16_t pages(size_t ps) +{ + return uint16_t(innodb_buffer_pool_extent_size / ps - first_page(ps)); +} + +/** The byte offset of the first page frame in a buffer pool extent +of innodb_buffer_pool_extent_size bytes */ +static constexpr size_t first_frame_in_extent[]= +{ + first_frame(4096), first_frame(8192), first_frame(16384), + first_frame(32768), first_frame(65536) +}; + +/** The position offset of the first page frame in a buffer pool extent +of innodb_buffer_pool_extent_size bytes */ +static constexpr uint8_t first_page_in_extent[]= +{ + first_page(4096), first_page(8192), first_page(16384), + first_page(32768), first_page(65536) +}; + +/** Number of pages per buffer pool extent +of innodb_buffer_pool_extent_size bytes */ +static constexpr size_t pages_in_extent[]= +{ + pages(4096), pages(8192), pages(16384), pages(32768), pages(65536) +}; + # ifdef SUX_LOCK_GENERIC void page_hash_latch::read_lock_wait() noexcept { @@ -326,8 +375,6 @@ const byte *field_ref_zero; /** The InnoDB buffer pool */ buf_pool_t buf_pool; -buf_pool_t::chunk_t::map *buf_pool_t::chunk_t::map_reg; -buf_pool_t::chunk_t::map *buf_pool_t::chunk_t::map_ref; #ifdef UNIV_DEBUG /** This is used to insert validation operations in execution @@ -797,6 +844,11 @@ public: bool setup() { + m_num_fds= 0; + + if (my_use_large_pages) + return false; + static_assert(array_elements(m_fds) == (array_elements(m_triggers) + 1), "insufficient fds"); std::string memcgroup{"/sys/fs/cgroup"}; @@ -809,7 +861,6 @@ public: cgroup.erase(0, 3); // Remove "0::" memcgroup+= cgroup + "/memory.pressure"; - m_num_fds= 0; for (auto trig= std::begin(m_triggers); trig!= std::end(m_triggers); ++trig) { if ((m_fds[m_num_fds].fd= @@ -958,1163 +1009,17 @@ void mem_pressure::pressure_routine(mem_pressure *m) } /** Initialize mem pressure. */ -ATTRIBUTE_COLD void buf_mem_pressure_detect_init() +ATTRIBUTE_COLD static void buf_mem_pressure_detect_init() noexcept { mem_pressure_obj.setup(); } -ATTRIBUTE_COLD void buf_mem_pressure_shutdown() +ATTRIBUTE_COLD void buf_mem_pressure_shutdown() noexcept { mem_pressure_obj.join(); } -#endif /* __linux__ */ -#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP) -/** Enable buffers to be dumped to core files - -A convience function, not called anyhwere directly however -it is left available for gdb or any debugger to call -in the event that you want all of the memory to be dumped -to a core file. - -Returns number of errors found in madvise calls. */ -MY_ATTRIBUTE((used)) -int -buf_madvise_do_dump() -{ - int ret= 0; - - /* mirrors allocation in log_t::create() */ - if (log_sys.buf) { - ret += madvise(log_sys.buf, log_sys.buf_size, MADV_DODUMP); - ret += madvise(log_sys.flush_buf, log_sys.buf_size, - MADV_DODUMP); - } - - mysql_mutex_lock(&buf_pool.mutex); - auto chunk = buf_pool.chunks; - - for (ulint n = buf_pool.n_chunks; n--; chunk++) { - ret+= madvise(chunk->mem, chunk->mem_size(), MADV_DODUMP); - } - - mysql_mutex_unlock(&buf_pool.mutex); - return ret; -} -#endif - -#ifndef UNIV_DEBUG -static inline byte hex_to_ascii(byte hex_digit) -{ - const int offset= hex_digit <= 9 ? '0' : 'a' - 10; - return byte(hex_digit + offset); -} -#endif - -/** Dump a page to stderr. -@param[in] read_buf database page -@param[in] zip_size compressed page size, or 0 */ -ATTRIBUTE_COLD -void buf_page_print(const byte *read_buf, ulint zip_size) noexcept -{ -#ifndef UNIV_DEBUG - const size_t size = zip_size ? zip_size : srv_page_size; - const byte * const end= read_buf + size; - sql_print_information("InnoDB: Page dump (%zu bytes):", size); - - do - { - byte row[64]; - - for (byte *r= row; r != &row[64]; r+= 2, read_buf++) - { - r[0]= hex_to_ascii(byte(*read_buf >> 4)); - r[1]= hex_to_ascii(*read_buf & 15); - } - - sql_print_information("InnoDB: %.*s", 64, row); - } - while (read_buf != end); - - sql_print_information("InnoDB: End of page dump"); -#endif -} - -/** Initialize a buffer page descriptor. -@param[in,out] block buffer page descriptor -@param[in] frame buffer page frame */ -static -void -buf_block_init(buf_block_t* block, byte* frame) -{ - /* This function should only be executed at database startup or by - buf_pool.resize(). Either way, adaptive hash index must not exist. */ - assert_block_ahi_empty_on_init(block); - - block->page.frame = frame; - - MEM_MAKE_DEFINED(&block->modify_clock, sizeof block->modify_clock); - ut_ad(!block->modify_clock); - MEM_MAKE_DEFINED(&block->page.lock, sizeof block->page.lock); - block->page.lock.init(); - block->page.init(buf_page_t::NOT_USED, page_id_t(~0ULL)); -#ifdef BTR_CUR_HASH_ADAPT - MEM_MAKE_DEFINED(&block->index, sizeof block->index); - ut_ad(!block->index); -#endif /* BTR_CUR_HASH_ADAPT */ - ut_d(block->in_unzip_LRU_list = false); - ut_d(block->in_withdraw_list = false); - - page_zip_des_init(&block->page.zip); - - MEM_MAKE_DEFINED(&block->page.hash, sizeof block->page.hash); - ut_ad(!block->page.hash); -} - -/** Allocate a chunk of buffer frames. -@param bytes requested size -@return whether the allocation succeeded */ -inline bool buf_pool_t::chunk_t::create(size_t bytes) noexcept -{ - DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return false;); - /* Round down to a multiple of page size, although it already should be. */ - bytes= ut_2pow_round(bytes, srv_page_size); - - mem= buf_pool.allocator.allocate_large_dontdump(bytes, &mem_pfx); - - if (UNIV_UNLIKELY(!mem)) - return false; - - MEM_UNDEFINED(mem, mem_size()); - -#ifdef HAVE_LIBNUMA - if (srv_numa_interleave) - { - struct bitmask *numa_mems_allowed= numa_get_mems_allowed(); - MEM_MAKE_DEFINED(numa_mems_allowed, sizeof *numa_mems_allowed); - if (mbind(mem, mem_size(), MPOL_INTERLEAVE, - numa_mems_allowed->maskp, numa_mems_allowed->size, - MPOL_MF_MOVE)) - { - ib::warn() << "Failed to set NUMA memory policy of" - " buffer pool page frames to MPOL_INTERLEAVE" - " (error: " << strerror(errno) << ")."; - } - numa_bitmask_free(numa_mems_allowed); - } -#endif /* HAVE_LIBNUMA */ - - - /* Allocate the block descriptors from - the start of the memory block. */ - blocks= reinterpret_cast(mem); - - /* Align a pointer to the first frame. Note that when - opt_large_page_size is smaller than srv_page_size, - (with max srv_page_size at 64k don't think any hardware - makes this true), - we may allocate one fewer block than requested. When - it is bigger, we may allocate more blocks than requested. */ - static_assert(sizeof(byte*) == sizeof(ulint), "pointer size"); - - byte *frame= reinterpret_cast((reinterpret_cast(mem) + - srv_page_size - 1) & - ~ulint{srv_page_size - 1}); - size= (mem_pfx.m_size >> srv_page_size_shift) - (frame != mem); - - /* Subtract the space needed for block descriptors. */ - { - ulint s= size; - - while (frame < reinterpret_cast(blocks + s)) - { - frame+= srv_page_size; - s--; - } - - size= s; - } - - /* Init block structs and assign frames for them. Then we assign the - frames to the first blocks (we already mapped the memory above). */ - - buf_block_t *block= blocks; - - for (auto i= size; i--; ) { - buf_block_init(block, frame); - MEM_UNDEFINED(block->page.frame, srv_page_size); - /* Add the block to the free list */ - UT_LIST_ADD_LAST(buf_pool.free, &block->page); - - ut_d(block->page.in_free_list = TRUE); - block++; - frame+= srv_page_size; - } - - reg(); - - return true; -} - -#ifdef UNIV_DEBUG -/** Check that all file pages in the buffer chunk are in a replaceable state. -@return address of a non-free block -@retval nullptr if all freed */ -inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const noexcept -{ - buf_block_t *block= blocks; - for (auto i= size; i--; block++) - { - if (block->page.in_file()) - { - /* The uncompressed buffer pool should never - contain ROW_FORMAT=COMPRESSED block descriptors. */ - ut_ad(block->page.frame); - const lsn_t lsn= block->page.oldest_modification(); - - if (srv_read_only_mode) - { - /* The page cleaner is disabled in read-only mode. No pages - can be dirtied, so all of them must be clean. */ - ut_ad(lsn == 0 || lsn == recv_sys.lsn || - srv_force_recovery == SRV_FORCE_NO_LOG_REDO); - break; - } - - if (fsp_is_system_temporary(block->page.id().space())) - { - ut_ad(lsn == 0 || lsn == 2); - break; - } - - if (lsn > 1 || !block->page.can_relocate()) - return block; - - break; - } - } - - return nullptr; -} -#endif /* UNIV_DEBUG */ - -/** Create the hash table. -@param n the lower bound of n_cells */ -void buf_pool_t::page_hash_table::create(ulint n) noexcept -{ - n_cells= ut_find_prime(n); - const size_t size= MY_ALIGN(pad(n_cells) * sizeof *array, - CPU_LEVEL1_DCACHE_LINESIZE); - void *v= aligned_malloc(size, CPU_LEVEL1_DCACHE_LINESIZE); - memset_aligned(v, 0, size); - array= static_cast(v); -} - -/** Create the buffer pool. -@return whether the creation failed */ -bool buf_pool_t::create() -{ - ut_ad(this == &buf_pool); - ut_ad(srv_buf_pool_size % srv_buf_pool_chunk_unit == 0); - ut_ad(!is_initialised()); - ut_ad(srv_buf_pool_size > 0); - ut_ad(!resizing); - ut_ad(!chunks_old); - /* mariabackup loads tablespaces, and it requires field_ref_zero to be - allocated before innodb initialization */ - ut_ad(srv_operation >= SRV_OPERATION_RESTORE || !field_ref_zero); - - NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; - - if (!field_ref_zero) { - if (auto b= aligned_malloc(UNIV_PAGE_SIZE_MAX, 4096)) - field_ref_zero= static_cast - (memset_aligned<4096>(b, 0, UNIV_PAGE_SIZE_MAX)); - else - return true; - } - - chunk_t::map_reg= UT_NEW_NOKEY(chunk_t::map()); - - new(&allocator) ut_allocator(mem_key_buf_buf_pool); - - n_chunks= srv_buf_pool_size / srv_buf_pool_chunk_unit; - const size_t chunk_size= srv_buf_pool_chunk_unit; - - chunks= static_cast(ut_zalloc_nokey(n_chunks * sizeof *chunks)); - UT_LIST_INIT(free, &buf_page_t::list); - curr_size= 0; - auto chunk= chunks; - - do - { - if (!chunk->create(chunk_size)) - { - while (--chunk >= chunks) - { - buf_block_t* block= chunk->blocks; - - for (auto i= chunk->size; i--; block++) - block->page.lock.free(); - - allocator.deallocate_large_dodump(chunk->mem, &chunk->mem_pfx); - } - ut_free(chunks); - chunks= nullptr; - UT_DELETE(chunk_t::map_reg); - chunk_t::map_reg= nullptr; - aligned_free(const_cast(field_ref_zero)); - field_ref_zero= nullptr; - ut_ad(!is_initialised()); - return true; - } - - curr_size+= chunk->size; - } - while (++chunk < chunks + n_chunks); - - ut_ad(is_initialised()); -#if defined(__aarch64__) - mysql_mutex_init(buf_pool_mutex_key, &mutex, MY_MUTEX_INIT_FAST); -#else - mysql_mutex_init(buf_pool_mutex_key, &mutex, nullptr); -#endif - - UT_LIST_INIT(LRU, &buf_page_t::LRU); - UT_LIST_INIT(withdraw, &buf_page_t::list); - withdraw_target= 0; - UT_LIST_INIT(flush_list, &buf_page_t::list); - UT_LIST_INIT(unzip_LRU, &buf_block_t::unzip_LRU); - - for (size_t i= 0; i < UT_ARR_SIZE(zip_free); ++i) - UT_LIST_INIT(zip_free[i], &buf_buddy_free_t::list); - ulint s= curr_size; - s/= BUF_READ_AHEAD_PORTION; - read_ahead_area= s >= READ_AHEAD_PAGES - ? READ_AHEAD_PAGES - : my_round_up_to_next_power(static_cast(s)); - curr_pool_size= srv_buf_pool_size; - - n_chunks_new= n_chunks; - - page_hash.create(2 * curr_size); - zip_hash.create(2 * curr_size); - last_printout_time= time(NULL); - - mysql_mutex_init(flush_list_mutex_key, &flush_list_mutex, - MY_MUTEX_INIT_FAST); - - pthread_cond_init(&done_flush_LRU, nullptr); - pthread_cond_init(&done_flush_list, nullptr); - pthread_cond_init(&do_flush_list, nullptr); - pthread_cond_init(&done_free, nullptr); - - try_LRU_scan= true; - - ut_d(flush_hp.m_mutex= &flush_list_mutex;); - ut_d(lru_hp.m_mutex= &mutex); - ut_d(lru_scan_itr.m_mutex= &mutex); - - io_buf.create((srv_n_read_io_threads + srv_n_write_io_threads) * - OS_AIO_N_PENDING_IOS_PER_THREAD); - - /* FIXME: remove some of these variables */ - srv_buf_pool_curr_size= curr_pool_size; - srv_buf_pool_old_size= srv_buf_pool_size; - srv_buf_pool_base_size= srv_buf_pool_size; - - last_activity_count= srv_get_activity_count(); - - chunk_t::map_ref= chunk_t::map_reg; - buf_LRU_old_ratio_update(100 * 3 / 8, false); - btr_search_sys_create(); - -#ifdef __linux__ - if (srv_operation == SRV_OPERATION_NORMAL) - buf_mem_pressure_detect_init(); -#endif - ut_ad(is_initialised()); - return false; -} - -/** Clean up after successful create() */ -void buf_pool_t::close() noexcept -{ - ut_ad(this == &buf_pool); - if (!is_initialised()) - return; - - mysql_mutex_destroy(&mutex); - mysql_mutex_destroy(&flush_list_mutex); - - for (buf_page_t *bpage= UT_LIST_GET_LAST(LRU), *prev_bpage= nullptr; bpage; - bpage= prev_bpage) - { - prev_bpage= UT_LIST_GET_PREV(LRU, bpage); - ut_ad(bpage->in_file()); - ut_ad(bpage->in_LRU_list); - /* The buffer pool must be clean during normal shutdown. - Only on aborted startup (with recovery) or with innodb_fast_shutdown=2 - we may discard changes. */ - ut_d(const lsn_t oldest= bpage->oldest_modification();) - ut_ad(fsp_is_system_temporary(bpage->id().space()) - ? (oldest == 0 || oldest == 2) - : oldest <= 1 || srv_is_being_started || srv_fast_shutdown == 2); - - if (UNIV_UNLIKELY(!bpage->frame)) - { - bpage->lock.free(); - ut_free(bpage); - } - } - - for (auto chunk= chunks + n_chunks; --chunk >= chunks; ) - { - buf_block_t *block= chunk->blocks; - - for (auto i= chunk->size; i--; block++) - block->page.lock.free(); - - allocator.deallocate_large_dodump(chunk->mem, &chunk->mem_pfx); - } - - pthread_cond_destroy(&done_flush_LRU); - pthread_cond_destroy(&done_flush_list); - pthread_cond_destroy(&do_flush_list); - pthread_cond_destroy(&done_free); - - ut_free(chunks); - chunks= nullptr; - page_hash.free(); - zip_hash.free(); - - io_buf.close(); - UT_DELETE(chunk_t::map_reg); - chunk_t::map_reg= chunk_t::map_ref= nullptr; - aligned_free(const_cast(field_ref_zero)); - field_ref_zero= nullptr; -} - -/** Try to reallocate a control block. -@param block control block to reallocate -@return whether the reallocation succeeded */ -inline bool buf_pool_t::realloc(buf_block_t *block) noexcept -{ - buf_block_t* new_block; - - mysql_mutex_assert_owner(&mutex); - ut_ad(block->page.in_file()); - ut_ad(block->page.frame); - - new_block = buf_LRU_get_free_only(); - - if (new_block == NULL) { - mysql_mutex_lock(&buf_pool.flush_list_mutex); - page_cleaner_wakeup(); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - return(false); /* free list was not enough */ - } - - const page_id_t id{block->page.id()}; - hash_chain& chain = page_hash.cell_get(id.fold()); - page_hash_latch& hash_lock = page_hash.lock_get(chain); - /* It does not make sense to use transactional_lock_guard - here, because copying innodb_page_size (4096 to 65536) bytes - as well as other changes would likely make the memory - transaction too large. */ - hash_lock.lock(); - - if (block->page.can_relocate()) { - memcpy_aligned( - new_block->page.frame, block->page.frame, - srv_page_size); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - const auto frame = new_block->page.frame; - new_block->page.lock.free(); - new (&new_block->page) buf_page_t(block->page); - new_block->page.frame = frame; - - /* relocate LRU list */ - if (buf_page_t* prev_b = buf_pool.LRU_remove(&block->page)) { - UT_LIST_INSERT_AFTER(LRU, prev_b, &new_block->page); - } else { - UT_LIST_ADD_FIRST(LRU, &new_block->page); - } - - if (LRU_old == &block->page) { - LRU_old = &new_block->page; - } - - ut_ad(new_block->page.in_LRU_list); - - /* relocate unzip_LRU list */ - if (block->page.zip.data != NULL) { - ut_ad(block->in_unzip_LRU_list); - ut_d(new_block->in_unzip_LRU_list = true); - - buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU, block); - UT_LIST_REMOVE(unzip_LRU, block); - - ut_d(block->in_unzip_LRU_list = false); - block->page.zip.data = NULL; - page_zip_set_size(&block->page.zip, 0); - - if (prev_block != NULL) { - UT_LIST_INSERT_AFTER(unzip_LRU, prev_block, new_block); - } else { - UT_LIST_ADD_FIRST(unzip_LRU, new_block); - } - } else { - ut_ad(!block->in_unzip_LRU_list); - ut_d(new_block->in_unzip_LRU_list = false); - } - - /* relocate page_hash */ - hash_chain& chain = page_hash.cell_get(id.fold()); - ut_ad(&block->page == page_hash.get(id, chain)); - buf_pool.page_hash.replace(chain, &block->page, - &new_block->page); - buf_block_modify_clock_inc(block); - static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - memset_aligned<4>(block->page.frame - + FIL_PAGE_OFFSET, 0xff, 4); - static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2, - "not perfect alignment"); - memset_aligned<2>(block->page.frame - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4); - MEM_UNDEFINED(block->page.frame, srv_page_size); - block->page.set_state(buf_page_t::REMOVE_HASH); - if (!fsp_is_system_temporary(id.space())) { - buf_flush_relocate_on_flush_list(&block->page, - &new_block->page); - } - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - block->page.set_corrupt_id(); - - /* set other flags of buf_block_t */ - -#ifdef BTR_CUR_HASH_ADAPT - /* This code should only be executed by resize(), - while the adaptive hash index is disabled. */ - assert_block_ahi_empty(block); - assert_block_ahi_empty_on_init(new_block); - ut_ad(!block->index); - new_block->index = NULL; - new_block->n_hash_helps = 0; - new_block->n_fields = 1; - new_block->left_side = TRUE; -#endif /* BTR_CUR_HASH_ADAPT */ - ut_d(block->page.set_state(buf_page_t::MEMORY)); - /* free block */ - new_block = block; - } - - hash_lock.unlock(); - buf_LRU_block_free_non_file_page(new_block); - return(true); /* free_list was enough */ -} - -void buf_pool_t::io_buf_t::create(ulint n_slots) noexcept -{ - this->n_slots= n_slots; - slots= static_cast - (ut_malloc_nokey(n_slots * sizeof *slots)); - memset((void*) slots, 0, n_slots * sizeof *slots); -} - -void buf_pool_t::io_buf_t::close() noexcept -{ - for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) - { - aligned_free(s->crypt_buf); - aligned_free(s->comp_buf); - } - ut_free(slots); - slots= nullptr; - n_slots= 0; -} - -buf_tmp_buffer_t *buf_pool_t::io_buf_t::reserve(bool wait_for_reads) noexcept -{ - for (;;) - { - for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) - if (s->acquire()) - return s; - buf_dblwr.flush_buffered_writes(); - os_aio_wait_until_no_pending_writes(true); - if (!wait_for_reads) - continue; - for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) - if (s->acquire()) - return s; - os_aio_wait_until_no_pending_reads(true); - } -} - -/** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status -to the specified string. The format and the following parameters are the -same as the ones used for printf(3). -@param[in] fmt format -@param[in] ... extra parameters according to fmt */ -static -void -buf_resize_status( - const char* fmt, - ...) -{ - va_list ap; - - va_start(ap, fmt); - - vsnprintf( - export_vars.innodb_buffer_pool_resize_status, - sizeof(export_vars.innodb_buffer_pool_resize_status), - fmt, ap); - - va_end(ap); - - ib::info() << export_vars.innodb_buffer_pool_resize_status; -} - -/** Withdraw blocks from the buffer pool until meeting withdraw_target. -@return whether retry is needed */ -inline bool buf_pool_t::withdraw_blocks() noexcept -{ - buf_block_t* block; - ulint loop_count = 0; - - ib::info() << "Start to withdraw the last " - << withdraw_target << " blocks."; - - while (UT_LIST_GET_LEN(withdraw) < withdraw_target) { - - /* try to withdraw from free_list */ - ulint count1 = 0; - - mysql_mutex_lock(&mutex); - buf_buddy_condense_free(); - block = reinterpret_cast( - UT_LIST_GET_FIRST(free)); - while (block != NULL - && UT_LIST_GET_LEN(withdraw) < withdraw_target) { - ut_ad(block->page.in_free_list); - ut_ad(!block->page.oldest_modification()); - ut_ad(!block->page.in_LRU_list); - ut_a(!block->page.in_file()); - - buf_block_t* next_block; - next_block = reinterpret_cast( - UT_LIST_GET_NEXT( - list, &block->page)); - - if (will_be_withdrawn(block->page)) { - /* This should be withdrawn */ - UT_LIST_REMOVE(free, &block->page); - UT_LIST_ADD_LAST(withdraw, &block->page); - ut_d(block->in_withdraw_list = true); - count1++; - } - - block = next_block; - } - - /* reserve free_list length */ - if (UT_LIST_GET_LEN(withdraw) < withdraw_target) { - try_LRU_scan = false; - mysql_mutex_unlock(&mutex); - mysql_mutex_lock(&flush_list_mutex); - page_cleaner_wakeup(true); - my_cond_wait(&done_flush_list, - &flush_list_mutex.m_mutex); - mysql_mutex_unlock(&flush_list_mutex); - mysql_mutex_lock(&mutex); - } - - /* relocate blocks/buddies in withdrawn area */ - ulint count2 = 0; - - buf_pool_mutex_exit_forbid(); - for (buf_page_t* bpage = UT_LIST_GET_FIRST(LRU), *next_bpage; - bpage; bpage = next_bpage) { - ut_ad(bpage->in_file()); - next_bpage = UT_LIST_GET_NEXT(LRU, bpage); - if (UNIV_LIKELY_NULL(bpage->zip.data) - && will_be_withdrawn(bpage->zip.data) - && bpage->can_relocate()) { - if (!buf_buddy_realloc( - bpage->zip.data, - page_zip_get_size(&bpage->zip))) { - /* failed to allocate block */ - break; - } - count2++; - if (bpage->frame) { - goto realloc_frame; - } - } - - if (bpage->frame && will_be_withdrawn(*bpage) - && bpage->can_relocate()) { -realloc_frame: - if (!realloc(reinterpret_cast( - bpage))) { - /* failed to allocate block */ - break; - } - count2++; - } - } - buf_pool_mutex_exit_allow(); - mysql_mutex_unlock(&mutex); - - buf_resize_status( - "Withdrawing blocks. (" ULINTPF "/" ULINTPF ").", - UT_LIST_GET_LEN(withdraw), - withdraw_target); - - ib::info() << "Withdrew " - << count1 << " blocks from free list." - << " Tried to relocate " << count2 << " blocks (" - << UT_LIST_GET_LEN(withdraw) << "/" - << withdraw_target << ")."; - - if (++loop_count >= 10) { - /* give up for now. - retried after user threads paused. */ - - ib::info() << "will retry to withdraw later"; - - /* need retry later */ - return(true); - } - } - - /* confirm withdrawn enough */ - for (const chunk_t* chunk = chunks + n_chunks_new, - * const echunk = chunks + n_chunks; chunk != echunk; chunk++) { - block = chunk->blocks; - for (ulint j = chunk->size; j--; block++) { - ut_a(block->page.state() == buf_page_t::NOT_USED); - ut_ad(block->in_withdraw_list); - } - } - - ib::info() << "Withdrawn target: " << UT_LIST_GET_LEN(withdraw) - << " blocks."; - - return(false); -} - - - -inline void buf_pool_t::page_hash_table::write_lock_all() noexcept -{ - for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1) - { - reinterpret_cast(array[n]).lock(); - if (!n) - break; - } -} - - -inline void buf_pool_t::page_hash_table::write_unlock_all() noexcept -{ - for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1) - { - reinterpret_cast(array[n]).unlock(); - if (!n) - break; - } -} - - -namespace -{ - -struct find_interesting_trx -{ - void operator()(const trx_t &trx) - { - if (!trx.is_started()) - return; - if (trx.mysql_thd == nullptr) - return; - if (withdraw_started <= trx.start_time_micro) - return; - - if (!found) - { - sql_print_warning("InnoDB: The following trx might hold " - "the blocks in buffer pool to " - "be withdrawn. Buffer pool " - "resizing can complete only " - "after all the transactions " - "below release the blocks."); - found= true; - } - - lock_trx_print_wait_and_mvcc_state(stderr, &trx, current_time); - } - - bool &found; - /** microsecond_interval_timer() */ - const ulonglong withdraw_started; - const my_hrtime_t current_time; -}; - -} // namespace - -/** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */ -inline void buf_pool_t::resize() -{ - ut_ad(this == &buf_pool); - ut_ad(srv_shutdown_state < SRV_SHUTDOWN_CLEANUP); - - bool warning = false; - - NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; - - ut_ad(!resize_in_progress()); - ut_ad(srv_buf_pool_chunk_unit > 0); - - ulint new_instance_size = srv_buf_pool_size >> srv_page_size_shift; - std::ostringstream str_old_size, str_new_size, str_chunk_size; - str_old_size << ib::bytes_iec{srv_buf_pool_old_size}; - str_new_size << ib::bytes_iec{srv_buf_pool_size}; - str_chunk_size << ib::bytes_iec{srv_buf_pool_chunk_unit}; - - buf_resize_status("Resizing buffer pool from %s to %s (unit = %s).", - str_old_size.str().c_str(), - str_new_size.str().c_str(), - str_chunk_size.str().c_str()); - -#ifdef BTR_CUR_HASH_ADAPT - /* disable AHI if needed */ - buf_resize_status("Disabling adaptive hash index."); - - btr_search_s_lock_all(); - const bool btr_search_disabled = btr_search_enabled; - btr_search_s_unlock_all(); - - btr_search_disable(); - - if (btr_search_disabled) { - ib::info() << "disabled adaptive hash index."; - } -#endif /* BTR_CUR_HASH_ADAPT */ - - mysql_mutex_lock(&mutex); - ut_ad(n_chunks_new == n_chunks); - ut_ad(UT_LIST_GET_LEN(withdraw) == 0); - - n_chunks_new = (new_instance_size << srv_page_size_shift) - / srv_buf_pool_chunk_unit; - curr_size = n_chunks_new * chunks->size; - mysql_mutex_unlock(&mutex); - - if (is_shrinking()) { - /* set withdraw target */ - size_t w = 0; - - for (const chunk_t* chunk = chunks + n_chunks_new, - * const echunk = chunks + n_chunks; - chunk != echunk; chunk++) - w += chunk->size; - - ut_ad(withdraw_target == 0); - withdraw_target = w; - } - - buf_resize_status("Withdrawing blocks to be shrunken."); - - ulonglong withdraw_started = microsecond_interval_timer(); - ulonglong message_interval = 60ULL * 1000 * 1000; - ulint retry_interval = 1; - -withdraw_retry: - /* wait for the number of blocks fit to the new size (if needed)*/ - bool should_retry_withdraw = is_shrinking() - && withdraw_blocks(); - - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - /* abort to resize for shutdown. */ - return; - } - - /* abort buffer pool load */ - buf_load_abort(); - - const ulonglong current_time = microsecond_interval_timer(); - - if (should_retry_withdraw - && current_time - withdraw_started >= message_interval) { - - if (message_interval > 900000000) { - message_interval = 1800000000; - } else { - message_interval *= 2; - } - - bool found= false; - find_interesting_trx f - {found, withdraw_started, my_hrtime_coarse()}; - withdraw_started = current_time; - - /* This is going to exceed the maximum size of a - memory transaction. */ - LockMutexGuard g{SRW_LOCK_CALL}; - trx_sys.trx_list.for_each(f); - } - - if (should_retry_withdraw) { - ib::info() << "Will retry to withdraw " << retry_interval - << " seconds later."; - std::this_thread::sleep_for( - std::chrono::seconds(retry_interval)); - - if (retry_interval > 5) { - retry_interval = 10; - } else { - retry_interval *= 2; - } - - goto withdraw_retry; - } - - buf_resize_status("Latching entire buffer pool."); - -#ifndef DBUG_OFF - { - bool should_wait = true; - - while (should_wait) { - should_wait = false; - DBUG_EXECUTE_IF( - "ib_buf_pool_resize_wait_before_resize", - should_wait = true; - std::this_thread::sleep_for( - std::chrono::milliseconds(10));); - } - } -#endif /* !DBUG_OFF */ - - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - return; - } - - /* Indicate critical path */ - resizing.store(true, std::memory_order_relaxed); - - mysql_mutex_lock(&mutex); - page_hash.write_lock_all(); - - chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map()); - - /* add/delete chunks */ - - buf_resize_status("Resizing buffer pool from " - ULINTPF " chunks to " ULINTPF " chunks.", - n_chunks, n_chunks_new); - - if (is_shrinking()) { - /* delete chunks */ - chunk_t* chunk = chunks + n_chunks_new; - const chunk_t* const echunk = chunks + n_chunks; - - ulint sum_freed = 0; - - while (chunk < echunk) { - /* buf_LRU_block_free_non_file_page() invokes - MEM_NOACCESS() on any buf_pool.free blocks. - We must cancel the effect of that. In - MemorySanitizer, MEM_NOACCESS() is no-op, so - we must not do anything special for it here. */ -#ifdef HAVE_valgrind -# if !__has_feature(memory_sanitizer) - MEM_MAKE_DEFINED(chunk->mem, chunk->mem_size()); -# endif -#else - MEM_MAKE_ADDRESSABLE(chunk->mem, chunk->size); -#endif - - buf_block_t* block = chunk->blocks; - - for (ulint j = chunk->size; j--; block++) { - block->page.lock.free(); - } - - allocator.deallocate_large_dodump( - chunk->mem, &chunk->mem_pfx); - sum_freed += chunk->size; - ++chunk; - } - - /* discard withdraw list */ - UT_LIST_INIT(withdraw, &buf_page_t::list); - withdraw_target = 0; - - ib::info() << n_chunks - n_chunks_new - << " Chunks (" << sum_freed - << " blocks) were freed."; - - n_chunks = n_chunks_new; - } - - { - /* reallocate chunks */ - const size_t new_chunks_size - = n_chunks_new * sizeof(chunk_t); - - chunk_t* new_chunks = static_cast( - ut_zalloc_nokey_nofatal(new_chunks_size)); - - DBUG_EXECUTE_IF("buf_pool_resize_chunk_null", - ut_free(new_chunks); new_chunks= nullptr; ); - - if (!new_chunks) { - ib::error() << "failed to allocate" - " the chunk array."; - n_chunks_new = n_chunks; - warning = true; - chunks_old = NULL; - goto calc_buf_pool_size; - } - - ulint n_chunks_copy = ut_min(n_chunks_new, n_chunks); - - memcpy(new_chunks, chunks, - n_chunks_copy * sizeof *new_chunks); - - for (ulint j = 0; j < n_chunks_copy; j++) { - new_chunks[j].reg(); - } - - chunks_old = chunks; - chunks = new_chunks; - } - - if (n_chunks_new > n_chunks) { - /* add chunks */ - ulint sum_added = 0; - ulint n = n_chunks; - const size_t unit = srv_buf_pool_chunk_unit; - - for (chunk_t* chunk = chunks + n_chunks, - * const echunk = chunks + n_chunks_new; - chunk != echunk; chunk++) { - if (!chunk->create(unit)) { - ib::error() << "failed to allocate" - " memory for buffer pool chunk"; - - warning = true; - n_chunks_new = n_chunks; - break; - } - - sum_added += chunk->size; - ++n; - } - - ib::info() << n_chunks_new - n_chunks - << " chunks (" << sum_added - << " blocks) were added."; - - n_chunks = n; - } -calc_buf_pool_size: - /* recalc curr_size */ - ulint new_size = 0; - - { - chunk_t* chunk = chunks; - const chunk_t* const echunk = chunk + n_chunks; - do { - new_size += chunk->size; - } while (++chunk != echunk); - } - - curr_size = new_size; - n_chunks_new = n_chunks; - - if (chunks_old) { - ut_free(chunks_old); - chunks_old = NULL; - } - - chunk_t::map* chunk_map_old = chunk_t::map_ref; - chunk_t::map_ref = chunk_t::map_reg; - - /* set size */ - ut_ad(UT_LIST_GET_LEN(withdraw) == 0); - ulint s= curr_size; - s/= BUF_READ_AHEAD_PORTION; - read_ahead_area= s >= READ_AHEAD_PAGES - ? READ_AHEAD_PAGES - : my_round_up_to_next_power(static_cast(s)); - curr_pool_size= n_chunks * srv_buf_pool_chunk_unit; - srv_buf_pool_curr_size= curr_pool_size;/* FIXME: remove*/ - extern ulonglong innobase_buffer_pool_size; - innobase_buffer_pool_size= buf_pool_size_align(srv_buf_pool_curr_size); - - const bool new_size_too_diff - = srv_buf_pool_base_size > srv_buf_pool_size * 2 - || srv_buf_pool_base_size * 2 < srv_buf_pool_size; - - mysql_mutex_unlock(&mutex); - page_hash.write_unlock_all(); - - UT_DELETE(chunk_map_old); - - resizing.store(false, std::memory_order_relaxed); - - /* Normalize other components, if the new size is too different */ - if (!warning && new_size_too_diff) { - srv_buf_pool_base_size = srv_buf_pool_size; - - buf_resize_status("Resizing other hash tables."); - - srv_lock_table_size = 5 - * (srv_buf_pool_size >> srv_page_size_shift); - lock_sys.resize(srv_lock_table_size); - dict_sys.resize(); - - ib::info() << "Resized hash tables: lock_sys," -#ifdef BTR_CUR_HASH_ADAPT - " adaptive hash index," -#endif /* BTR_CUR_HASH_ADAPT */ - " and dictionary."; - } - - /* normalize ibuf.max_size */ - ibuf_max_size_update(srv_change_buffer_max_size); - - if (srv_buf_pool_old_size != srv_buf_pool_size) { - - buf_resize_status("Completed resizing buffer pool from %zu to %zu bytes." - ,srv_buf_pool_old_size, srv_buf_pool_size); - srv_buf_pool_old_size = srv_buf_pool_size; - } - -#ifdef BTR_CUR_HASH_ADAPT - /* enable AHI if needed */ - if (btr_search_disabled) { - btr_search_enable(true); - ib::info() << "Re-enabled adaptive hash index."; - } -#endif /* BTR_CUR_HASH_ADAPT */ - - if (warning) - buf_resize_status("Resizing buffer pool failed"); - - ut_d(validate()); - - return; -} - -#ifdef __linux__ -inline void buf_pool_t::garbage_collect() +inline void buf_pool_t::garbage_collect() noexcept { mysql_mutex_lock(&mutex); size_t freed= 0; @@ -2205,67 +1110,993 @@ rescan: freed); return; } -#endif /* __linux__ */ +#endif -/** Thread pool task invoked by innodb_buffer_pool_size changes. */ -static void buf_resize_callback(void *) +#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP) +/** Enable buffers to be dumped to core files. + +A convenience function, not called anyhwere directly however +it is left available for gdb or any debugger to call +in the event that you want all of the memory to be dumped +to a core file. + +@return number of errors found in madvise() calls */ +MY_ATTRIBUTE((used)) +int buf_pool_t::madvise_do_dump() noexcept { - DBUG_ENTER("buf_resize_callback"); - ut_ad(srv_shutdown_state < SRV_SHUTDOWN_CLEANUP); - mysql_mutex_lock(&buf_pool.mutex); - const auto size= srv_buf_pool_size; - const bool work= srv_buf_pool_old_size != size; - mysql_mutex_unlock(&buf_pool.mutex); + int ret= 0; - if (work) - buf_pool.resize(); + /* mirrors allocation in log_t::create() */ + if (log_sys.buf) { + ret += madvise(log_sys.buf, log_sys.buf_size, MADV_DODUMP); + ret += madvise(log_sys.flush_buf, log_sys.buf_size, + MADV_DODUMP); + } + + ret+= madvise(buf_pool.memory, buf_pool.size_in_bytes, MADV_DODUMP); + return ret; +} +#endif + +#ifndef UNIV_DEBUG +static inline byte hex_to_ascii(byte hex_digit) noexcept +{ + const int offset= hex_digit <= 9 ? '0' : 'a' - 10; + return byte(hex_digit + offset); +} +#endif + +/** Dump a page to stderr. +@param[in] read_buf database page +@param[in] zip_size compressed page size, or 0 */ +ATTRIBUTE_COLD +void buf_page_print(const byte *read_buf, ulint zip_size) noexcept +{ +#ifndef UNIV_DEBUG + const size_t size = zip_size ? zip_size : srv_page_size; + const byte * const end= read_buf + size; + sql_print_information("InnoDB: Page dump (%zu bytes):", size); + + do + { + byte row[64]; + + for (byte *r= row; r != &row[64]; r+= 2, read_buf++) + { + r[0]= hex_to_ascii(byte(*read_buf >> 4)); + r[1]= hex_to_ascii(*read_buf & 15); + } + + sql_print_information("InnoDB: %.*s", 64, row); + } + while (read_buf != end); + + sql_print_information("InnoDB: End of page dump"); +#endif +} + +IF_DBUG(,inline) byte *buf_block_t::frame_address() const noexcept +{ + static_assert(ut_is_2pow(innodb_buffer_pool_extent_size), ""); + + byte *frame_= reinterpret_cast + ((reinterpret_cast(this) & ~(innodb_buffer_pool_extent_size - 1)) | + first_frame_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]); + ut_ad(reinterpret_cast(this) + sizeof(*this) <= frame_); + frame_+= + (((reinterpret_cast(this) & (innodb_buffer_pool_extent_size - 1)) / + sizeof(*this)) << srv_page_size_shift); + return frame_; +} + +buf_block_t *buf_pool_t::block_from(const void *ptr) noexcept +{ + static_assert(ut_is_2pow(innodb_buffer_pool_extent_size), ""); + ut_ad(static_cast(ptr) >= buf_pool.memory); + + byte *first_block= reinterpret_cast + (reinterpret_cast(ptr) & ~(innodb_buffer_pool_extent_size - 1)); + const size_t first_frame= + first_frame_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + + ut_ad(static_cast(ptr) >= first_block + first_frame); + return reinterpret_cast(first_block) + + (((size_t(ptr) & (innodb_buffer_pool_extent_size - 1)) - first_frame) >> + srv_page_size_shift); +} + +/** Determine the address of the first invalid block descriptor +@param n_blocks buf_pool.n_blocks +@return offset of the first invalid buf_block_t, relative to buf_pool.memory */ +static size_t block_descriptors_in_bytes(size_t n_blocks) noexcept +{ + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + const size_t extent_size= pages_in_extent[ssize]; + return n_blocks / extent_size * innodb_buffer_pool_extent_size + + (n_blocks % extent_size) * sizeof(buf_block_t); +} + +buf_block_t *buf_pool_t::get_nth_page(size_t pos) const noexcept +{ + mysql_mutex_assert_owner(&mutex); + ut_ad(pos < n_blocks); + return reinterpret_cast + (memory + block_descriptors_in_bytes(pos)); +} + +buf_block_t *buf_pool_t::allocate() noexcept +{ + mysql_mutex_assert_owner(&mutex); + + while (buf_page_t *b= UT_LIST_GET_FIRST(free)) + { + ut_ad(b->in_free_list); + ut_d(b->in_free_list = FALSE); + ut_ad(!b->oldest_modification()); + ut_ad(!b->in_LRU_list); + ut_a(!b->in_file()); + UT_LIST_REMOVE(free, b); + + if (UNIV_LIKELY(!n_blocks_to_withdraw) || !withdraw(*b)) + { + /* No adaptive hash index entries may point to a free block. */ + assert_block_ahi_empty(reinterpret_cast(b)); + b->set_state(buf_page_t::MEMORY); + b->set_os_used(); + return reinterpret_cast(b); + } + } + + return nullptr; +} + +/** Create the hash table. +@param n the lower bound of n_cells */ +void buf_pool_t::page_hash_table::create(ulint n) noexcept +{ + n_cells= ut_find_prime(n); + const size_t size= MY_ALIGN(pad(n_cells) * sizeof *array, + CPU_LEVEL1_DCACHE_LINESIZE); + void *v= aligned_malloc(size, CPU_LEVEL1_DCACHE_LINESIZE); + memset_aligned(v, 0, size); + array= static_cast(v); +} + +size_t buf_pool_t::get_n_blocks(size_t size_in_bytes) noexcept +{ + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + size_t n_blocks_alloc= size_in_bytes / innodb_buffer_pool_extent_size * + pages_in_extent[ssize]; + + if (const size_t incomplete_extent_pages= + (size_in_bytes & (innodb_buffer_pool_extent_size - 1)) >> + srv_page_size_shift) + { + ssize_t d= incomplete_extent_pages - first_page_in_extent[ssize]; + ut_ad(d > 0); + n_blocks_alloc+= d; + } + + return n_blocks_alloc; +} + +size_t buf_pool_t::blocks_in_bytes(size_t n_blocks) noexcept +{ + const size_t shift{srv_page_size_shift}; + const size_t ssize{shift - UNIV_PAGE_SIZE_SHIFT_MIN}; + const size_t extent_size= pages_in_extent[ssize]; + size_t size_in_bytes= n_blocks / extent_size * + innodb_buffer_pool_extent_size; + if (size_t remainder= n_blocks % extent_size) + size_in_bytes+= (remainder + first_page_in_extent[ssize]) << shift; + ut_ad(get_n_blocks(size_in_bytes) == n_blocks); + return size_in_bytes; +} + +/** Create the buffer pool. +@return whether the creation failed */ +bool buf_pool_t::create() noexcept +{ + ut_ad(this == &buf_pool); + ut_ad(!is_initialised()); + ut_ad(size_in_bytes_requested > 0); + ut_ad(!(size_in_bytes_max & (innodb_buffer_pool_extent_size - 1))); + ut_ad(!(size_in_bytes_requested & ((1U << 20) - 1))); + ut_ad(size_in_bytes_requested <= size_in_bytes_max); + /* mariabackup loads tablespaces, and it requires field_ref_zero to be + allocated before innodb initialization */ + ut_ad(srv_operation >= SRV_OPERATION_RESTORE || !field_ref_zero); + + if (!field_ref_zero) + { + if (auto b= aligned_malloc(UNIV_PAGE_SIZE_MAX, 4096)) + { + field_ref_zero= static_cast + (memset_aligned<4096>(b, 0, UNIV_PAGE_SIZE_MAX)); + goto init; + } + + oom: + ut_ad(!is_initialised()); + sql_print_error("InnoDB: Cannot map innodb_buffer_pool_size_max=%zum", + size_in_bytes_max >> 20); + return true; + } + + init: + DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", goto oom;); + size_t size= size_in_bytes_max; + sql_print_information("InnoDB: innodb_buffer_pool_size_max=%zum," + " innodb_buffer_pool_size=%zum", + size >> 20, size_in_bytes_requested >> 20); + + retry: + { + NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; + memory_unaligned= my_virtual_mem_reserve(&size); + } + + if (!memory_unaligned) + goto oom; + + const size_t alignment_waste= + ((~size_t(memory_unaligned) & (innodb_buffer_pool_extent_size - 1)) + 1) & + (innodb_buffer_pool_extent_size - 1); + + if (size < size_in_bytes_max + alignment_waste) + { + my_virtual_mem_release(memory_unaligned, size); + size+= 1 + + (~size_t(memory_unaligned) & (innodb_buffer_pool_extent_size - 1)); + goto retry; + } + + MEM_UNDEFINED(memory_unaligned, size); + ut_dontdump(memory_unaligned, size, true); + memory= memory_unaligned + alignment_waste; + size_unaligned= size; + size-= alignment_waste; + size&= ~(innodb_buffer_pool_extent_size - 1); + + const size_t actual_size= size_in_bytes_requested; + ut_ad(actual_size <= size); + + size_in_bytes= actual_size; + os_total_large_mem_allocated+= actual_size; + +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_alloc)(mem_key_buf_buf_pool, actual_size, &owner); +#endif + if (!my_virtual_mem_commit(memory, actual_size)) + { + my_virtual_mem_release(memory_unaligned, size_unaligned); + memory= nullptr; + memory_unaligned= nullptr; + goto oom; + } + +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) + { + struct bitmask *numa_mems_allowed= numa_get_mems_allowed(); + MEM_MAKE_DEFINED(numa_mems_allowed, sizeof *numa_mems_allowed); + if (mbind(memory_unaligned, size_unaligned, MPOL_INTERLEAVE, + numa_mems_allowed->maskp, numa_mems_allowed->size, + MPOL_MF_MOVE)) + sql_print_warning("InnoDB: Failed to set NUMA memory policy of" + " buffer pool page frames to MPOL_INTERLEAVE" + " (error: %s).", strerror(errno)); + numa_bitmask_free(numa_mems_allowed); + } +#endif /* HAVE_LIBNUMA */ + + n_blocks= get_n_blocks(actual_size); + n_blocks_to_withdraw= 0; + UT_LIST_INIT(free, &buf_page_t::list); + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + { + buf_block_t *block= reinterpret_cast(extent); + const buf_block_t *extent_end= block + pages_in_extent[ssize]; + if (reinterpret_cast(extent_end) > end) + extent_end= reinterpret_cast(end); + MEM_MAKE_DEFINED(block, (extent_end - block) * sizeof *block); + for (byte *frame= reinterpret_cast(extent) + + first_frame_in_extent[ssize]; + block < extent_end; block++, frame+= srv_page_size) + { + ut_ad(!memcmp(block, field_ref_zero, sizeof *block)); + block->page.frame= frame; + block->page.lock.init(); + UT_LIST_ADD_LAST(free, &block->page); + ut_d(block->page.in_free_list= true); + } + } + +#if defined(__aarch64__) + mysql_mutex_init(buf_pool_mutex_key, &mutex, MY_MUTEX_INIT_FAST); +#else + mysql_mutex_init(buf_pool_mutex_key, &mutex, nullptr); +#endif + + UT_LIST_INIT(withdrawn, &buf_page_t::list); + UT_LIST_INIT(LRU, &buf_page_t::LRU); + UT_LIST_INIT(flush_list, &buf_page_t::list); + UT_LIST_INIT(unzip_LRU, &buf_block_t::unzip_LRU); + + for (size_t i= 0; i < UT_ARR_SIZE(zip_free); ++i) + UT_LIST_INIT(zip_free[i], &buf_buddy_free_t::list); + ulint s= n_blocks; + s/= BUF_READ_AHEAD_PORTION; + read_ahead_area= s >= READ_AHEAD_PAGES + ? READ_AHEAD_PAGES + : my_round_up_to_next_power(static_cast(s)); + + page_hash.create(2 * n_blocks); + last_printout_time= time(nullptr); + + mysql_mutex_init(flush_list_mutex_key, &flush_list_mutex, + MY_MUTEX_INIT_FAST); + + pthread_cond_init(&done_flush_LRU, nullptr); + pthread_cond_init(&done_flush_list, nullptr); + pthread_cond_init(&do_flush_list, nullptr); + pthread_cond_init(&done_free, nullptr); + + try_LRU_scan= true; + + ut_d(flush_hp.m_mutex= &flush_list_mutex;); + ut_d(lru_hp.m_mutex= &mutex); + ut_d(lru_scan_itr.m_mutex= &mutex); + + io_buf.create((srv_n_read_io_threads + srv_n_write_io_threads) * + OS_AIO_N_PENDING_IOS_PER_THREAD); + + last_activity_count= srv_get_activity_count(); + + buf_LRU_old_ratio_update(100 * 3 / 8, false); + btr_search_sys_create(); + +#ifdef __linux__ + if (srv_operation == SRV_OPERATION_NORMAL) + buf_mem_pressure_detect_init(); +#endif + ut_ad(is_initialised()); + sql_print_information("InnoDB: Completed initialization of buffer pool"); + return false; +} + +/** Clean up after successful create() */ +void buf_pool_t::close() noexcept +{ + ut_ad(this == &buf_pool); + if (!is_initialised()) + return; + + mysql_mutex_destroy(&mutex); + mysql_mutex_destroy(&flush_list_mutex); + + for (buf_page_t *bpage= UT_LIST_GET_LAST(LRU), *prev_bpage= nullptr; bpage; + bpage= prev_bpage) + { + prev_bpage= UT_LIST_GET_PREV(LRU, bpage); + ut_ad(bpage->in_file()); + ut_ad(bpage->in_LRU_list); + /* The buffer pool must be clean during normal shutdown. + Only on aborted startup (with recovery) or with innodb_fast_shutdown=2 + we may discard changes. */ + ut_d(const lsn_t oldest= bpage->oldest_modification();) + ut_ad(fsp_is_system_temporary(bpage->id().space()) + ? (oldest == 0 || oldest == 2) + : oldest <= 1 || srv_is_being_started || srv_fast_shutdown == 2); + + if (UNIV_UNLIKELY(!bpage->frame)) + { + bpage->lock.free(); + ut_free(bpage); + } + } + + { + const size_t size{size_in_bytes}; + + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + for (buf_block_t *block= reinterpret_cast(extent), + *extent_end= block + + pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + block < extent_end && reinterpret_cast(block) < end; block++) + { + MEM_MAKE_DEFINED(&block->page.lock, sizeof &block->page.lock); + block->page.lock.free(); + } + + ut_dodump(memory_unaligned, size_unaligned); +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_free)(mem_key_buf_buf_pool, size, owner); + owner= nullptr; +#endif + os_total_large_mem_allocated-= size; + my_virtual_mem_decommit(memory, size); + my_virtual_mem_release(memory_unaligned, size_unaligned); + memory= nullptr; + memory_unaligned= nullptr; + } + + pthread_cond_destroy(&done_flush_LRU); + pthread_cond_destroy(&done_flush_list); + pthread_cond_destroy(&do_flush_list); + pthread_cond_destroy(&done_free); + + page_hash.free(); + + io_buf.close(); + aligned_free(const_cast(field_ref_zero)); + field_ref_zero= nullptr; +} + +void buf_pool_t::io_buf_t::create(ulint n_slots) noexcept +{ + this->n_slots= n_slots; + slots= static_cast + (ut_malloc_nokey(n_slots * sizeof *slots)); + memset((void*) slots, 0, n_slots * sizeof *slots); +} + +void buf_pool_t::io_buf_t::close() noexcept +{ + for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) + { + aligned_free(s->crypt_buf); + aligned_free(s->comp_buf); + } + ut_free(slots); + slots= nullptr; + n_slots= 0; +} + +buf_tmp_buffer_t *buf_pool_t::io_buf_t::reserve(bool wait_for_reads) noexcept +{ + for (;;) + { + for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) + if (s->acquire()) + return s; + buf_dblwr.flush_buffered_writes(); + os_aio_wait_until_no_pending_writes(true); + if (!wait_for_reads) + continue; + for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) + if (s->acquire()) + return s; + os_aio_wait_until_no_pending_reads(true); + } +} + +ATTRIBUTE_COLD bool buf_pool_t::withdraw(buf_page_t &bpage) noexcept +{ + mysql_mutex_assert_owner(&mutex); + ut_ad(n_blocks_to_withdraw); + ut_ad(first_to_withdraw); + ut_ad(!bpage.zip.data); + if (&bpage < first_to_withdraw) + return false; + n_blocks_to_withdraw--; + bpage.lock.free(); + UT_LIST_ADD_LAST(withdrawn, &bpage); + return true; +} + +ATTRIBUTE_COLD buf_pool_t::shrink_status buf_pool_t::shrink(size_t size) + noexcept +{ + mysql_mutex_assert_owner(&mutex); + buf_load_abort(); + + if (!n_blocks_to_withdraw) + { + withdraw_done: + first_to_withdraw= nullptr; + while (buf_page_t *b= UT_LIST_GET_FIRST(withdrawn)) + { + UT_LIST_REMOVE(withdrawn, b); + /* satisfy the check in lazy_allocate() */ + ut_d(memset((void*) b, 0, sizeof(buf_block_t))); + } + return SHRINK_DONE; + } + + buf_buddy_condense_free(size); + + for (buf_page_t *b= UT_LIST_GET_FIRST(free), *next; b; b= next) + { + ut_ad(b->in_free_list); + ut_ad(!b->in_LRU_list); + ut_ad(!b->zip.data); + ut_ad(!b->oldest_modification()); + ut_a(b->state() == buf_page_t::NOT_USED); + + next= UT_LIST_GET_NEXT(list, b); + + if (b >= first_to_withdraw) + { + UT_LIST_REMOVE(free, b); + b->lock.free(); + UT_LIST_ADD_LAST(withdrawn, b); + if (!--n_blocks_to_withdraw) + goto withdraw_done; + } + } + + buf_block_t *block= allocate(); + size_t scanned= 0; + for (buf_page_t *b= lru_scan_itr.start(), *prev; block && b; b= prev) + { + ut_ad(b->in_LRU_list); + ut_a(b->in_file()); + + prev= UT_LIST_GET_PREV(LRU, b); + + if (!b->can_relocate()) + { + next: + if (++scanned & 31) + continue; + /* Avoid starvation by periodically releasing buf_pool.mutex. */ + lru_scan_itr.set(prev); + mysql_mutex_unlock(&mutex); + mysql_mutex_lock(&mutex); + prev= lru_scan_itr.get(); + continue; + } + + const page_id_t id{b->id()}; + hash_chain &chain= page_hash.cell_get(id.fold()); + page_hash_latch &hash_lock= page_hash.lock_get(chain); + hash_lock.lock(); + + { + /* relocate flush_list and b->page.zip */ + bool have_flush_list_mutex= false; + + switch (b->oldest_modification()) { + case 2: + ut_ad(fsp_is_system_temporary(id.space())); + /* fall through */ + case 0: + break; + default: + mysql_mutex_lock(&flush_list_mutex); + switch (ut_d(lsn_t om=) b->oldest_modification()) { + case 1: + delete_from_flush_list(b); + /* fall through */ + case 0: + mysql_mutex_unlock(&flush_list_mutex); + break; + default: + ut_ad(om != 2); + have_flush_list_mutex= true; + } + } + + if (!b->can_relocate()) + { + next_quick: + if (have_flush_list_mutex) + mysql_mutex_unlock(&flush_list_mutex); + hash_lock.unlock(); + continue; + } + + if (UNIV_LIKELY_NULL(b->zip.data) && + will_be_withdrawn(b->zip.data, size)) + { + block= buf_buddy_shrink(b, block); + ut_ad(mach_read_from_4(b->zip.data + FIL_PAGE_OFFSET) == id.page_no()); + if (UNIV_UNLIKELY(!n_blocks_to_withdraw)) + { + if (have_flush_list_mutex) + mysql_mutex_unlock(&flush_list_mutex); + hash_lock.unlock(); + if (block) + buf_LRU_block_free_non_file_page(block); + goto withdraw_done; + } + if (!block && !(block= allocate())) + goto next_quick; + } + + if (!b->frame || b < first_to_withdraw) + goto next_quick; + + ut_ad(is_uncompressed_current(b)); + + byte *const frame= block->page.frame; + memcpy_aligned<4096>(frame, b->frame, srv_page_size); + b->lock.free(); + block->page.lock.free(); + new(&block->page) buf_page_t(*b); + block->page.frame= frame; + + if (have_flush_list_mutex) + { + buf_flush_relocate_on_flush_list(b, &block->page); + mysql_mutex_unlock(&flush_list_mutex); + } + } + + /* relocate LRU list */ + if (buf_page_t *prev_b= LRU_remove(b)) + UT_LIST_INSERT_AFTER(LRU, prev_b, &block->page); + else + UT_LIST_ADD_FIRST(LRU, &block->page); + + if (LRU_old == b) + LRU_old= &block->page; + + ut_ad(block->page.in_LRU_list); + + /* relocate page_hash */ + ut_ad(b == page_hash.get(id, chain)); + page_hash.replace(chain, b, &block->page); + + if (b->zip.data) + { + ut_ad(mach_read_from_4(b->zip.data + FIL_PAGE_OFFSET) == id.page_no()); + b->zip.data= nullptr; + /* relocate unzip_LRU list */ + buf_block_t *old_block= reinterpret_cast(b); + ut_ad(old_block->in_unzip_LRU_list); + ut_d(old_block->in_unzip_LRU_list= false); + ut_d(block->in_unzip_LRU_list= true); + + buf_block_t *prev= UT_LIST_GET_PREV(unzip_LRU, old_block); + UT_LIST_REMOVE(unzip_LRU, old_block); + + if (prev) + UT_LIST_INSERT_AFTER(unzip_LRU, prev, block); + else + UT_LIST_ADD_FIRST(unzip_LRU, block); + } + + buf_block_modify_clock_inc(block); + +#ifdef BTR_CUR_HASH_ADAPT + assert_block_ahi_empty_on_init(block); + block->index= nullptr; + block->n_hash_helps= 0; + block->n_fields= 1; + block->left_side= true; +#endif /* BTR_CUR_HASH_ADAPT */ + hash_lock.unlock(); + + ut_d(b->in_LRU_list= false); + + b->set_state(buf_page_t::NOT_USED); + UT_LIST_ADD_LAST(withdrawn, b); + if (!--n_blocks_to_withdraw) + goto withdraw_done; + + block= allocate(); + goto next; + } + + mysql_mutex_lock(&flush_list_mutex); + + if (LRU_warned && !UT_LIST_GET_FIRST(free)) + { + LRU_warned_clear(); + mysql_mutex_unlock(&flush_list_mutex); + return SHRINK_ABORT; + } + + try_LRU_scan= false; + mysql_mutex_unlock(&mutex); + page_cleaner_wakeup(true); + my_cond_wait(&done_flush_list, &flush_list_mutex.m_mutex); + mysql_mutex_unlock(&flush_list_mutex); + mysql_mutex_lock(&mutex); + + if (!n_blocks_to_withdraw) + goto withdraw_done; + + return SHRINK_IN_PROGRESS; +} + +inline void buf_pool_t::shrunk(size_t size, size_t reduced) noexcept +{ + ut_ad(size + reduced == size_in_bytes); + size_in_bytes_requested= size; + size_in_bytes= size; +# ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT + /* Only page_guess() may read this memory, which after + my_virtual_mem_decommit() may be zeroed out or preserve its original + contents. Try to catch any unintended reads outside page_guess(). */ + MEM_UNDEFINED(memory + size, size_in_bytes_max - size); +# else + for (size_t n= page_hash.pad(page_hash.n_cells), i= 0; i < n; + i+= page_hash.ELEMENTS_PER_LATCH + 1) + { + auto &latch= reinterpret_cast(page_hash.array[i]); + latch.lock(); + /* We already shrunk size_in_bytes. The exclusive lock here + ensures that any page_guess() will detect an out-of-bounds + guess before we invoke my_virtual_mem_decommit() below. */ + latch.unlock(); + } +# endif + my_virtual_mem_decommit(memory + size, reduced); +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_free)(mem_key_buf_buf_pool, reduced, owner); +#endif +} + +ATTRIBUTE_COLD void buf_pool_t::resize(size_t size, THD *thd) noexcept +{ + ut_ad(this == &buf_pool); + mysql_mutex_assert_owner(&LOCK_global_system_variables); + ut_ad(size <= size_in_bytes_max); + if (my_use_large_pages) + { + my_error(ER_VARIABLE_IS_READONLY, MYF(0), "InnoDB", + "innodb_buffer_pool_size", "large_pages=0"); + return; + } + + size_t n_blocks_new= get_n_blocks(size); + + mysql_mutex_lock(&mutex); + + const size_t old_size= size_in_bytes; + if (first_to_withdraw || old_size != size_in_bytes_requested) + { + mysql_mutex_unlock(&mutex); + my_printf_error(ER_WRONG_USAGE, + "innodb_buffer_pool_size change is already in progress", + MYF(0)); + return; + } + + ut_ad(UT_LIST_GET_LEN(withdrawn) == 0); + ut_ad(n_blocks_to_withdraw == 0); +#ifdef __linux__ + DBUG_EXECUTE_IF("trigger_garbage_collection", + mem_pressure_obj.trigger_collection();); +#endif + + if (size == old_size) + { + mysql_mutex_unlock(&mutex); + DBUG_EXECUTE_IF("trigger_garbage_collection", + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + garbage_collect();); + return; + } + +#ifdef BTR_CUR_HASH_ADAPT + bool ahi_disabled= false; +#endif + + const bool significant_change= + n_blocks_new > n_blocks * 2 || n_blocks > n_blocks_new * 2; + const ssize_t n_blocks_removed= n_blocks - n_blocks_new; + + if (n_blocks_removed <= 0) + { + if (!my_virtual_mem_commit(memory + old_size, size - old_size)) + { + mysql_mutex_unlock(&mutex); + sql_print_error("InnoDB: Cannot commit innodb_buffer_pool_size=%zum;" + " retaining innodb_buffer_pool_size=%zum", + size >> 20, old_size >> 20); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return; + } + + size_in_bytes_requested= size; + size_in_bytes= size; + + { + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + const size_t pages= pages_in_extent[ssize]; + const size_t first_extent= n_blocks / pages; + + char *extent= memory + first_extent * innodb_buffer_pool_extent_size; + + buf_block_t *block= reinterpret_cast(extent); + if (const size_t first_blocks= n_blocks % pages) + { + /* Extend the last (partial) extent until its end */ + const buf_block_t *extent_end= block + + (first_extent == (n_blocks_new / pages) + ? (n_blocks_new % pages) + : pages); + block+= first_blocks; + memset((void*) block, 0, (extent_end - block) * sizeof *block); + + for (byte *frame= reinterpret_cast(extent) + + first_frame_in_extent[ssize] + + (first_blocks << srv_page_size_shift); block < extent_end; + block++, frame+= srv_page_size) + { + block->page.frame= frame; + block->page.lock.init(); + UT_LIST_ADD_LAST(free, &block->page); + ut_d(block->page.in_free_list= true); + } + extent+= innodb_buffer_pool_extent_size; + } + + /* Fill in further extents; @see buf_pool_t::create() */ + for (const char *const end_new= memory + + block_descriptors_in_bytes(n_blocks_new); + extent < end_new; extent+= innodb_buffer_pool_extent_size) + { + block= reinterpret_cast(extent); + const buf_block_t *extent_end= block + pages; + if (reinterpret_cast(extent_end) > end_new) + extent_end= reinterpret_cast(end_new); + + memset((void*) block, 0, (extent_end - block) * sizeof *block); + for (byte *frame= reinterpret_cast(extent) + + first_frame_in_extent[ssize]; + block < extent_end; block++, frame+= srv_page_size) + { + block->page.frame= frame; + block->page.lock.init(); + UT_LIST_ADD_LAST(free, &block->page); + ut_d(block->page.in_free_list= true); + } + } + } + + mysql_mutex_unlock(&LOCK_global_system_variables); + resized: + ut_ad(UT_LIST_GET_LEN(withdrawn) == 0); + ut_ad(n_blocks_to_withdraw == 0); + ut_ad(!first_to_withdraw); + const size_t old_blocks{n_blocks}; + n_blocks= n_blocks_new; + + size_t s= n_blocks_new / BUF_READ_AHEAD_PORTION; + read_ahead_area= s >= READ_AHEAD_PAGES + ? READ_AHEAD_PAGES + : my_round_up_to_next_power(uint32(s)); + + if (ssize_t d= size - old_size) + { + os_total_large_mem_allocated+= d; + if (d > 0) + { + /* Already committed memory earlier */ + ut_ad(n_blocks_removed <= 0); +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_alloc)(mem_key_buf_buf_pool, d, &owner); +#endif + } + else + shrunk(size, size_t(-d)); + } + + mysql_mutex_unlock(&mutex); + + if (significant_change) + { + sql_print_information("InnoDB: Resizing hash tables"); + srv_lock_table_size= 5 * n_blocks_new; + lock_sys.resize(srv_lock_table_size); + dict_sys.resize(); + } + + ibuf_max_size_update(srv_change_buffer_max_size); +#ifdef BTR_CUR_HASH_ADAPT + if (ahi_disabled) + btr_search_enable(true); +#endif + mysql_mutex_lock(&LOCK_global_system_variables); + bool resized= n_blocks_removed < 0; + if (n_blocks_removed > 0) + { + mysql_mutex_lock(&mutex); + resized= size_in_bytes == old_size; + if (resized) + { + size_in_bytes_requested= size; + size_in_bytes= size; + } + mysql_mutex_unlock(&mutex); + } + + if (resized) + sql_print_information("InnoDB: innodb_buffer_pool_size=%zum (%zu pages)" + " resized from %zum (%zu pages)", + size >> 20, n_blocks_new, old_size >> 20, + old_blocks); + } else { - std::ostringstream sout; - sout << "Size did not change: old size = new size = " << size; - buf_resize_status(sout.str().c_str()); + size_t to_withdraw= size_t(n_blocks_removed); + n_blocks_to_withdraw= to_withdraw; + first_to_withdraw= &get_nth_page(n_blocks_new)->page; + size_in_bytes_requested= size; + mysql_mutex_unlock(&LOCK_global_system_variables); + + mysql_mutex_unlock(&mutex); + DEBUG_SYNC_C("buf_pool_shrink_before_wakeup"); + mysql_mutex_lock(&flush_list_mutex); + page_cleaner_wakeup(true); + my_cond_wait(&done_flush_list, &flush_list_mutex.m_mutex); + mysql_mutex_unlock(&flush_list_mutex); +#ifdef BTR_CUR_HASH_ADAPT + ahi_disabled= btr_search_disable(); +#endif /* BTR_CUR_HASH_ADAPT */ + mysql_mutex_lock(&mutex); + + time_t last_message= 0; + + do + { + time_t now= time(nullptr); + if (now - last_message > 15) + { + if (last_message != 0 && to_withdraw == n_blocks_to_withdraw) + break; + to_withdraw= n_blocks_to_withdraw; + last_message= now; + sql_print_information("InnoDB: Trying to shrink" + " innodb_buffer_pool_size=%zum (%zu pages)" + " from %zum (%zu pages, to withdraw %zu)", + size >> 20, n_blocks_new, + old_size >> 20, n_blocks, to_withdraw); + } + shrink_status s{shrink(size)}; + if (s == SHRINK_DONE) + goto resized; + if (s != SHRINK_IN_PROGRESS) + break; + } + while (!thd_kill_level(thd)); + + ut_ad(size_in_bytes > size_in_bytes_requested); + n_blocks_to_withdraw= 0; + first_to_withdraw= nullptr; + size_in_bytes_requested= size_in_bytes; + + while (buf_page_t *b= UT_LIST_GET_FIRST(withdrawn)) + { + UT_LIST_REMOVE(withdrawn, b); + UT_LIST_ADD_LAST(free, b); + ut_d(b->in_free_list= true); + ut_ad(b->state() == buf_page_t::NOT_USED); + b->lock.init(); + } + + mysql_mutex_unlock(&mutex); + my_printf_error(ER_WRONG_USAGE, "innodb_buffer_pool_size change aborted", + MYF(ME_ERROR_LOG)); + mysql_mutex_lock(&LOCK_global_system_variables); } - DBUG_VOID_RETURN; + + ut_d(validate()); } -/* Ensure that task does not run in parallel, by setting max_concurrency to 1 for the thread group */ -static tpool::task_group single_threaded_group(1); -static tpool::waitable_task buf_resize_task(buf_resize_callback, - nullptr, &single_threaded_group); - -void buf_resize_start() -{ -#if !defined(DBUG_OFF) && defined(__linux__) - DBUG_EXECUTE_IF("trigger_garbage_collection", - { - mem_pressure_obj.trigger_collection(); - } - ); -#endif - - srv_thread_pool->submit_task(&buf_resize_task); -} - -void buf_resize_shutdown() -{ -#ifdef __linux__ - buf_mem_pressure_shutdown(); -#endif - buf_resize_task.wait(); -} - - /** Relocate a ROW_FORMAT=COMPRESSED block in the LRU list and buf_pool.page_hash. The caller must relocate bpage->list. @param bpage ROW_FORMAT=COMPRESSED only block @param dpage destination control block */ -static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) +static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) noexcept { const page_id_t id{bpage->id()}; buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); ut_ad(!bpage->frame); mysql_mutex_assert_owner(&buf_pool.mutex); + ut_ad(mach_read_from_4(bpage->zip.data + FIL_PAGE_OFFSET) == id.page_no()); ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked()); ut_ad(bpage == buf_pool.page_hash.get(id, chain)); ut_ad(!buf_pool.watch_is_sentinel(*bpage)); @@ -2274,6 +2105,7 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) ut_ad(state <= buf_page_t::READ_FIX); ut_ad(bpage->lock.is_write_locked()); const auto frame= dpage->frame; + ut_ad(frame == reinterpret_cast(dpage)->frame_address()); dpage->lock.free(); new (dpage) buf_page_t(*bpage); @@ -2345,7 +2177,6 @@ got_block: ut_ad(w->access_time == 0); ut_ad(!w->oldest_modification()); ut_ad(!w->zip.data); - ut_ad(!w->in_zip_hash); static_assert(buf_page_t::NOT_USED == 0, "efficiency"); if (ut_d(auto s=) w->state()) { @@ -2625,6 +2456,8 @@ bool buf_zip_decompress(buf_block_t *block, bool check) noexcept ut_ad(block->zip_size()); ut_a(block->page.id().space() != 0); + ut_ad(mach_read_from_4(frame + FIL_PAGE_OFFSET) + == block->page.id().page_no()); if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) { @@ -2863,7 +2696,6 @@ buf_block_t *buf_pool_t::page_fix(const page_id_t id, if (b && !watch_is_sentinel(*b)) { uint32_t state= b->fix() + 1; - ut_ad(!b->in_zip_hash); hash_lock.unlock_shared(); if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) @@ -2893,7 +2725,8 @@ buf_block_t *buf_pool_t::page_fix(const page_id_t id, return reinterpret_cast(-1); } - if (UNIV_LIKELY(b->frame != nullptr)); + if (UNIV_LIKELY(b->frame != nullptr)) + ut_ad(b->frame==reinterpret_cast(b)->frame_address()); else if (state < buf_page_t::READ_FIX) goto unzip; else @@ -2959,6 +2792,49 @@ buf_block_t *buf_pool_t::page_fix(const page_id_t id, } } +TRANSACTIONAL_TARGET +uint32_t buf_pool_t::page_guess(buf_block_t *b, page_hash_latch &latch, + const page_id_t id) noexcept +{ + transactional_shared_lock_guard g{latch}; +#ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT + /* shrunk() and my_virtual_mem_decommit() could retain the original + contents of the virtual memory range or zero it out immediately or + with a delay. Any zeroing out may lead to a false positive for + b->page.id() == id but never for b->page.state(). At the time of + the shrunk() call, shrink() and buf_LRU_block_free_non_file_page() + should guarantee that b->page.state() is equal to + buf_page_t::NOT_USED (0) for all to-be-freed blocks. */ +#else + /* shrunk() made the memory inaccessible. */ + if (UNIV_UNLIKELY(reinterpret_cast(b) >= memory + size_in_bytes)) + return 0; +#endif + const page_id_t block_id{b->page.id()}; +#ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT + /* shrunk() may have invoked MEM_UNDEFINED() on this memory to be able + to catch any unintended access elsewhere in our code. */ + MEM_MAKE_DEFINED(&block_id, sizeof block_id); +#endif + + if (id == block_id) + { + uint32_t state= b->page.state(); +#ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT + /* shrunk() may have invoked MEM_UNDEFINED() on this memory to be able + to catch any unintended access elsewhere in our code. */ + MEM_MAKE_DEFINED(&state, sizeof state); +#endif + /* Ignore guesses that point to read-fixed blocks. We can only + avoid a race condition by looking up the block via page_hash. */ + if ((state >= buf_page_t::FREED && state < buf_page_t::READ_FIX) || + state >= buf_page_t::WRITE_FIX) + return b->page.fix(); + ut_ad(b->page.frame); + } + return 0; +} + /** Low level function used to get access to a database page. @param[in] page_id page id @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @@ -3023,22 +2899,9 @@ loop: buf_block_t* block = guess; uint32_t state; - if (block) { - transactional_shared_lock_guard g{hash_lock}; - if (buf_pool.is_uncompressed(block) - && page_id == block->page.id()) { - ut_ad(!block->page.in_zip_hash); - state = block->page.state(); - /* Ignore guesses that point to read-fixed blocks. - We can only avoid a race condition by - looking up the block via buf_pool.page_hash. */ - if ((state >= buf_page_t::FREED - && state < buf_page_t::READ_FIX) - || state >= buf_page_t::WRITE_FIX) { - state = block->page.fix(); - goto got_block; - } - } + if (block + && (state = buf_pool.page_guess(block, hash_lock, page_id))) { + goto got_block; } guess = nullptr; @@ -3108,7 +2971,6 @@ loop: goto loop; got_block: - ut_ad(!block->page.in_zip_hash); state++; got_block_fixed: ut_ad(state > buf_page_t::FREED); @@ -3313,6 +3175,7 @@ wait_for_unzip: btr_search_drop_page_hash_index(block, true); #endif /* BTR_CUR_HASH_ADAPT */ + ut_ad(block->page.frame == block->frame_address()); ut_ad(page_id_t(page_get_space_id(block->page.frame), page_get_page_no(block->page.frame)) == page_id); return block; @@ -3418,21 +3281,19 @@ buf_page_get_gen( return block; } -TRANSACTIONAL_TARGET buf_block_t *buf_page_optimistic_fix(buf_block_t *block, page_id_t id) noexcept { buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); - transactional_shared_lock_guard g - {buf_pool.page_hash.lock_get(chain)}; - if (UNIV_UNLIKELY(!buf_pool.is_uncompressed(block) || - id != block->page.id() || !block->page.frame)) - return nullptr; - const auto state= block->page.state(); - if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED || - state >= buf_page_t::READ_FIX)) - return nullptr; - block->page.fix(); - return block; + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); + if (uint32_t state= buf_pool.page_guess(block, hash_lock, id)) + { + if (UNIV_LIKELY(state >= buf_page_t::UNFIXED)) + return block; + else + /* Refuse access to pages that are marked as freed in the data file. */ + block->page.unfix(); + } + return nullptr; } buf_block_t *buf_page_optimistic_get(buf_block_t *block, @@ -3547,6 +3408,7 @@ void buf_block_t::initialise(const page_id_t page_id, ulint zip_size, { ut_ad(!page.in_file()); buf_block_init_low(this); + page.lock.init(); page.init(fix, page_id); page.set_os_used(); page_zip_set_size(&page.zip, zip_size); @@ -3635,6 +3497,7 @@ retry: { mysql_mutex_unlock(&buf_pool.mutex); buf_block_t *block= reinterpret_cast(bpage); + ut_ad(bpage->frame == block->frame_address()); mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); #ifdef BTR_CUR_HASH_ADAPT drop_hash_entry= block->index; @@ -3670,7 +3533,8 @@ retry: else { mysql_mutex_unlock(&buf_pool.mutex); - ut_ad(bpage->frame); + ut_ad(bpage->frame == + reinterpret_cast(bpage)->frame_address()); #ifdef BTR_CUR_HASH_ADAPT ut_ad(!reinterpret_cast(bpage)->index); #endif @@ -4127,6 +3991,61 @@ success_page: return DB_SUCCESS; } +#ifdef BTR_CUR_HASH_ADAPT +/** Clear the adaptive hash index on all pages in the buffer pool. */ +ATTRIBUTE_COLD void buf_pool_t::clear_hash_index() noexcept +{ + std::set garbage; + + mysql_mutex_lock(&mutex); + ut_ad(!btr_search_enabled); + + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + for (buf_block_t *block= reinterpret_cast(extent), + *extent_end= block + + pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + block < extent_end && reinterpret_cast(block) < end; block++) + { + dict_index_t *index= block->index; + assert_block_ahi_valid(block); + + /* We can clear block->index and block->n_pointers when + holding all AHI latches exclusively; see the comments in buf0buf.h */ + + if (!index) + { +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + ut_a(!block->n_pointers); +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + continue; + } + + ut_d(const auto s= block->page.state()); + /* Another thread may have set the state to + REMOVE_HASH in buf_LRU_block_remove_hashed(). + + The state change in buf_pool_t::resize() is not observable + here, because in that case we would have !block->index. + + In the end, the entire adaptive hash index will be removed. */ + ut_ad(s >= buf_page_t::UNFIXED || s == buf_page_t::REMOVE_HASH); +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + block->n_pointers= 0; +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + if (index->freed()) + garbage.insert(index); + block->index= nullptr; + } + + mysql_mutex_unlock(&mutex); + + for (dict_index_t *index : garbage) + btr_search_lazy_free(index); +} +#endif /* BTR_CUR_HASH_ADAPT */ + #ifdef UNIV_DEBUG /** Check that all blocks are in a replaceable state. @return address of a non-free block @@ -4134,10 +4053,44 @@ success_page: void buf_pool_t::assert_all_freed() noexcept { mysql_mutex_lock(&mutex); - const chunk_t *chunk= chunks; - for (auto i= n_chunks; i--; chunk++) - if (const buf_block_t* block= chunk->not_freed()) - ib::fatal() << "Page " << block->page.id() << " still fixed or dirty"; + + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + for (buf_block_t *block= reinterpret_cast(extent), + *extent_end= block + + pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + block < extent_end && reinterpret_cast(block) < end; block++) + { + if (!block->page.in_file()) + continue; + switch (const lsn_t lsn= block->page.oldest_modification()) { + case 0: + case 1: + break; + + case 2: + ut_ad(fsp_is_system_temporary(block->page.id().space())); + break; + + default: + if (srv_read_only_mode) + { + /* The page cleaner is disabled in read-only mode. No pages + can be dirtied, so all of them must be clean. */ + ut_ad(lsn == recv_sys.lsn || + srv_force_recovery == SRV_FORCE_NO_LOG_REDO); + break; + } + + goto fixed_or_dirty; + } + + if (!block->page.can_relocate()) + fixed_or_dirty: + ib::fatal() << "Page " << block->page.id() << " still fixed or dirty"; + } + mysql_mutex_unlock(&mutex); } #endif /* UNIV_DEBUG */ @@ -4187,40 +4140,35 @@ void buf_pool_t::validate() noexcept mysql_mutex_lock(&mutex); - chunk_t* chunk = chunks; - /* Check the uncompressed blocks. */ - for (auto i = n_chunks; i--; chunk++) { - buf_block_t* block = chunk->blocks; + for (ulint i = 0; i < n_blocks; i++) { + const buf_block_t* block = get_nth_page(i); + ut_ad(block->page.frame == block->frame_address()); - for (auto j = chunk->size; j--; block++) { - ut_ad(block->page.frame); - switch (const auto f = block->page.state()) { - case buf_page_t::NOT_USED: - n_free++; + switch (const auto f = block->page.state()) { + case buf_page_t::NOT_USED: + ut_ad(!block->page.in_LRU_list); + n_free++; + break; + case buf_page_t::MEMORY: + case buf_page_t::REMOVE_HASH: + /* do nothing */ + break; + default: + if (f >= buf_page_t::READ_FIX + && f < buf_page_t::WRITE_FIX) { + /* A read-fixed block is not + necessarily in the page_hash yet. */ break; - - case buf_page_t::MEMORY: - case buf_page_t::REMOVE_HASH: - /* do nothing */ - break; - - default: - if (f >= buf_page_t::READ_FIX - && f < buf_page_t::WRITE_FIX) { - /* A read-fixed block is not - necessarily in the page_hash yet. */ - break; - } - ut_ad(f >= buf_page_t::FREED); - const page_id_t id{block->page.id()}; - ut_ad(page_hash.get( - id, - page_hash.cell_get(id.fold())) - == &block->page); - n_lru++; } + ut_ad(f >= buf_page_t::FREED); + const page_id_t id{block->page.id()}; + ut_ad(page_hash.get( + id, + page_hash.cell_get(id.fold())) + == &block->page); + n_lru++; } } @@ -4245,24 +4193,11 @@ void buf_pool_t::validate() noexcept ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing); mysql_mutex_unlock(&flush_list_mutex); - - if (n_chunks_new == n_chunks - && n_lru + n_free > curr_size + n_zip) { - - ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free - << ", pool " << curr_size - << " zip " << n_zip << ". Aborting..."; - } - + ut_ad(n_lru + n_free <= n_blocks + n_zip); ut_ad(UT_LIST_GET_LEN(LRU) >= n_lru); - - if (n_chunks_new == n_chunks - && UT_LIST_GET_LEN(free) != n_free) { - - ib::fatal() << "Free list len " - << UT_LIST_GET_LEN(free) - << ", free blocks " << n_free << ". Aborting..."; - } + ut_ad(UT_LIST_GET_LEN(free) <= n_free); + ut_ad(size_in_bytes != size_in_bytes_requested + || UT_LIST_GET_LEN(free) == n_free); mysql_mutex_unlock(&mutex); @@ -4277,26 +4212,23 @@ void buf_pool_t::print() noexcept { index_id_t* index_ids; ulint* counts; - ulint size; ulint i; - ulint j; index_id_t id; ulint n_found; - chunk_t* chunk; dict_index_t* index; - size = curr_size; + mysql_mutex_lock(&mutex); index_ids = static_cast( - ut_malloc_nokey(size * sizeof *index_ids)); + ut_malloc_nokey(n_blocks * sizeof *index_ids)); - counts = static_cast(ut_malloc_nokey(sizeof(ulint) * size)); + counts = static_cast( + ut_malloc_nokey(sizeof(ulint) * n_blocks)); - mysql_mutex_lock(&mutex); mysql_mutex_lock(&flush_list_mutex); ib::info() - << "[buffer pool: size=" << curr_size + << "[buffer pool: size=" << n_blocks << ", database pages=" << UT_LIST_GET_LEN(LRU) << ", free pages=" << UT_LIST_GET_LEN(free) << ", modified database pages=" @@ -4316,38 +4248,28 @@ void buf_pool_t::print() noexcept n_found = 0; - chunk = chunks; + for (size_t i = 0; i < n_blocks; i++) { + buf_block_t* block = get_nth_page(i); + const buf_frame_t* frame = block->page.frame; + ut_ad(frame == block->frame_address()); - for (i = n_chunks; i--; chunk++) { - buf_block_t* block = chunk->blocks; - ulint n_blocks = chunk->size; + if (fil_page_index_page_check(frame)) { - for (; n_blocks--; block++) { - const buf_frame_t* frame = block->page.frame; + id = btr_page_get_index_id(frame); - if (fil_page_index_page_check(frame)) { - - id = btr_page_get_index_id(frame); - - /* Look for the id in the index_ids array */ - j = 0; - - while (j < n_found) { - - if (index_ids[j] == id) { - counts[j]++; - - break; - } - j++; - } - - if (j == n_found) { - n_found++; - index_ids[j] = id; - counts[j] = 1; + /* Look for the id in the index_ids array */ + for (ulint j = 0; j < n_found; j++) { + if (index_ids[j] == id) { + counts[j]++; + goto found; } } + + index_ids[n_found] = id; + counts[n_found] = 1; + n_found++; +found: + continue; } } @@ -4381,138 +4303,78 @@ ulint buf_get_latched_pages_number() noexcept { ulint fixed_pages_number= 0; - mysql_mutex_lock(&buf_pool.mutex); + mysql_mutex_assert_owner(&buf_pool.mutex); for (buf_page_t *b= UT_LIST_GET_FIRST(buf_pool.LRU); b; b= UT_LIST_GET_NEXT(LRU, b)) if (b->state() > buf_page_t::UNFIXED) fixed_pages_number++; - mysql_mutex_unlock(&buf_pool.mutex); - return fixed_pages_number; } #endif /* UNIV_DEBUG */ -/** Collect buffer pool metadata. -@param[out] pool_info buffer pool metadata */ -void buf_stats_get_pool_info(buf_pool_info_t *pool_info) noexcept +void buf_pool_t::get_info(buf_pool_info_t *pool_info) noexcept { - time_t current_time; - double time_elapsed; + mysql_mutex_lock(&mutex); + pool_info->pool_size= curr_size(); + pool_info->lru_len= UT_LIST_GET_LEN(LRU); + pool_info->old_lru_len= LRU_old_len; + pool_info->free_list_len= UT_LIST_GET_LEN(free); - mysql_mutex_lock(&buf_pool.mutex); + mysql_mutex_lock(&flush_list_mutex); + pool_info->flush_list_len= UT_LIST_GET_LEN(flush_list); + pool_info->n_pend_unzip= UT_LIST_GET_LEN(unzip_LRU); + pool_info->n_pend_reads= os_aio_pending_reads_approx(); + pool_info->n_pending_flush_lru= n_flush(); + pool_info->n_pending_flush_list= os_aio_pending_writes(); + mysql_mutex_unlock(&flush_list_mutex); - pool_info->pool_size = buf_pool.curr_size; + double elapsed= 0.001 + difftime(time(nullptr), last_printout_time); - pool_info->lru_len = UT_LIST_GET_LEN(buf_pool.LRU); - - pool_info->old_lru_len = buf_pool.LRU_old_len; - - pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool.free); - - mysql_mutex_lock(&buf_pool.flush_list_mutex); - pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool.flush_list); - - pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool.unzip_LRU); - - pool_info->n_pend_reads = os_aio_pending_reads_approx(); - - pool_info->n_pending_flush_lru = buf_pool.n_flush(); - - pool_info->n_pending_flush_list = os_aio_pending_writes(); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - - current_time = time(NULL); - time_elapsed = 0.001 + difftime(current_time, - buf_pool.last_printout_time); - - pool_info->n_pages_made_young = buf_pool.stat.n_pages_made_young; - - pool_info->n_pages_not_made_young = - buf_pool.stat.n_pages_not_made_young; - - pool_info->n_pages_read = buf_pool.stat.n_pages_read; - - pool_info->n_pages_created = buf_pool.stat.n_pages_created; - - pool_info->n_pages_written = buf_pool.stat.n_pages_written; - - pool_info->n_page_gets = buf_pool.stat.n_page_gets; - - pool_info->n_ra_pages_read_rnd = buf_pool.stat.n_ra_pages_read_rnd; - pool_info->n_ra_pages_read = buf_pool.stat.n_ra_pages_read; - - pool_info->n_ra_pages_evicted = buf_pool.stat.n_ra_pages_evicted; - - pool_info->page_made_young_rate = - static_cast(buf_pool.stat.n_pages_made_young - - buf_pool.old_stat.n_pages_made_young) - / time_elapsed; - - pool_info->page_not_made_young_rate = - static_cast(buf_pool.stat.n_pages_not_made_young - - buf_pool.old_stat.n_pages_not_made_young) - / time_elapsed; - - pool_info->pages_read_rate = - static_cast(buf_pool.stat.n_pages_read - - buf_pool.old_stat.n_pages_read) - / time_elapsed; - - pool_info->pages_created_rate = - static_cast(buf_pool.stat.n_pages_created - - buf_pool.old_stat.n_pages_created) - / time_elapsed; - - pool_info->pages_written_rate = - static_cast(buf_pool.stat.n_pages_written - - buf_pool.old_stat.n_pages_written) - / time_elapsed; - - pool_info->n_page_get_delta = buf_pool.stat.n_page_gets - - buf_pool.old_stat.n_page_gets; - - if (pool_info->n_page_get_delta) { - pool_info->page_read_delta = buf_pool.stat.n_pages_read - - buf_pool.old_stat.n_pages_read; - - pool_info->young_making_delta = - buf_pool.stat.n_pages_made_young - - buf_pool.old_stat.n_pages_made_young; - - pool_info->not_young_making_delta = - buf_pool.stat.n_pages_not_made_young - - buf_pool.old_stat.n_pages_not_made_young; - } - pool_info->pages_readahead_rnd_rate = - static_cast(buf_pool.stat.n_ra_pages_read_rnd - - buf_pool.old_stat.n_ra_pages_read_rnd) - / time_elapsed; - - - pool_info->pages_readahead_rate = - static_cast(buf_pool.stat.n_ra_pages_read - - buf_pool.old_stat.n_ra_pages_read) - / time_elapsed; - - pool_info->pages_evicted_rate = - static_cast(buf_pool.stat.n_ra_pages_evicted - - buf_pool.old_stat.n_ra_pages_evicted) - / time_elapsed; - - pool_info->unzip_lru_len = UT_LIST_GET_LEN(buf_pool.unzip_LRU); - - pool_info->io_sum = buf_LRU_stat_sum.io; - - pool_info->io_cur = buf_LRU_stat_cur.io; - - pool_info->unzip_sum = buf_LRU_stat_sum.unzip; - - pool_info->unzip_cur = buf_LRU_stat_cur.unzip; - - buf_refresh_io_stats(); - mysql_mutex_unlock(&buf_pool.mutex); + pool_info->n_pages_made_young= stat.n_pages_made_young; + pool_info->page_made_young_rate= + double(stat.n_pages_made_young - old_stat.n_pages_made_young) / + elapsed; + pool_info->n_pages_not_made_young= stat.n_pages_not_made_young; + pool_info->page_not_made_young_rate= + double(stat.n_pages_not_made_young - old_stat.n_pages_not_made_young) / + elapsed; + pool_info->n_pages_read= stat.n_pages_read; + pool_info->pages_read_rate= + double(stat.n_pages_read - old_stat.n_pages_read) / elapsed; + pool_info->n_pages_created= stat.n_pages_created; + pool_info->pages_created_rate= + double(stat.n_pages_created - old_stat.n_pages_created) / elapsed; + pool_info->n_pages_written= stat.n_pages_written; + pool_info->pages_written_rate= + double(stat.n_pages_written - old_stat.n_pages_written) / elapsed; + pool_info->n_page_gets= stat.n_page_gets; + pool_info->n_page_get_delta= stat.n_page_gets - old_stat.n_page_gets; + if (pool_info->n_page_get_delta) + { + pool_info->page_read_delta= stat.n_pages_read - old_stat.n_pages_read; + pool_info->young_making_delta= + stat.n_pages_made_young - old_stat.n_pages_made_young; + pool_info->not_young_making_delta= + stat.n_pages_not_made_young - old_stat.n_pages_not_made_young; + } + pool_info->n_ra_pages_read_rnd= stat.n_ra_pages_read_rnd; + pool_info->pages_readahead_rnd_rate= + double(stat.n_ra_pages_read_rnd - old_stat.n_ra_pages_read_rnd) / elapsed; + pool_info->n_ra_pages_read= stat.n_ra_pages_read; + pool_info->pages_readahead_rate= + double(stat.n_ra_pages_read - old_stat.n_ra_pages_read) / elapsed; + pool_info->n_ra_pages_evicted= stat.n_ra_pages_evicted; + pool_info->pages_evicted_rate= + double(stat.n_ra_pages_evicted - old_stat.n_ra_pages_evicted) / elapsed; + pool_info->unzip_lru_len= UT_LIST_GET_LEN(unzip_LRU); + pool_info->io_sum= buf_LRU_stat_sum.io; + pool_info->io_cur= buf_LRU_stat_cur.io; + pool_info->unzip_sum= buf_LRU_stat_sum.unzip; + pool_info->unzip_cur= buf_LRU_stat_cur.unzip; + buf_refresh_io_stats(); + mysql_mutex_unlock(&mutex); } /*********************************************************************//** @@ -4620,7 +4482,7 @@ buf_print_io( { buf_pool_info_t pool_info; - buf_stats_get_pool_info(&pool_info); + buf_pool.get_info(&pool_info); buf_print_io_instance(&pool_info, file); } diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc index fa239e7d5cc..ff46074e8d0 100644 --- a/storage/innobase/buf/buf0dump.cc +++ b/storage/innobase/buf/buf0dump.cc @@ -58,7 +58,7 @@ take after being waked up. */ static volatile bool buf_dump_should_start; static volatile bool buf_load_should_start; -static bool buf_load_abort_flag; +static Atomic_relaxed buf_load_abort_flag; /** Start the buffer pool dump/load task and instructs it to start a dump. */ void buf_dump_start() @@ -295,7 +295,7 @@ buf_dump( /* limit the number of total pages dumped to X% of the total number of pages */ - t_pages = buf_pool.curr_size * srv_buf_pool_dump_pct / 100; + t_pages = buf_pool.curr_size() * srv_buf_pool_dump_pct / 100; if (n_pages > t_pages) { buf_dump_status(STATUS_INFO, "Restricted to " ULINTPF @@ -477,10 +477,10 @@ buf_load() return; } - /* If dump is larger than the buffer pool(s), then we ignore the + /* If the dump is larger than the buffer pool, then we ignore the extra trailing. This could happen if a dump is made, then buffer pool is shrunk and then load is attempted. */ - dump_n = std::min(dump_n, buf_pool.get_n_pages()); + dump_n = std::min(dump_n, buf_pool.curr_size()); if (dump_n != 0) { dump = static_cast(ut_malloc_nokey( diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 1a3368c9f94..c019786867b 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -281,6 +281,8 @@ void buf_page_t::write_complete(bool persistent, bool error, uint32_t state) { ut_ad(!persistent == fsp_is_system_temporary(id().space())); ut_ad(state >= WRITE_FIX); + ut_ad(!frame || + frame == reinterpret_cast(this)->frame_address()); if (UNIV_LIKELY(!error)) { @@ -795,6 +797,7 @@ bool buf_page_t::flush(fil_space_t *space) noexcept size_t orig_size; #endif buf_tmp_buffer_t *slot= nullptr; + byte *page= frame; if (UNIV_UNLIKELY(!frame)) /* ROW_FORMAT=COMPRESSED */ { @@ -810,7 +813,6 @@ bool buf_page_t::flush(fil_space_t *space) noexcept } else { - byte *page= frame; size= block->physical_size(); #if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 orig_size= size; @@ -891,7 +893,7 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, : space.physical_size() == 1024 ? 3 : 0)); /* When flushed, dirty blocks are searched in neighborhoods of this size, and flushed along with the original page. */ - const ulint s= buf_pool.curr_size / 16; + const ulint s= buf_pool.curr_size() / 16; const uint32_t read_ahead= buf_pool.read_ahead_area; const uint32_t buf_flush_area= read_ahead > s ? static_cast(s) : read_ahead; @@ -1211,16 +1213,15 @@ static void buf_flush_discard_page(buf_page_t *bpage) noexcept /** Flush dirty blocks from the end buf_pool.LRU, and move clean blocks to buf_pool.free. -@param max maximum number of blocks to flush -@param n counts of flushed and evicted pages */ -static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) noexcept +@param max maximum number of blocks to flush +@param n counts of flushed and evicted pages +@param to_withdraw buf_pool.to_withdraw() */ +static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n, + size_t to_withdraw) noexcept { - ulint scanned= 0; + size_t scanned= 0; mysql_mutex_assert_owner(&buf_pool.mutex); - ulint free_limit{buf_pool.LRU_scan_depth}; - if (buf_pool.withdraw_target && buf_pool.is_shrinking()) - free_limit+= buf_pool.withdraw_target - UT_LIST_GET_LEN(buf_pool.withdraw); - + size_t free_limit{buf_pool.LRU_scan_depth + to_withdraw}; const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN ? 0 : buf_pool.flush_neighbors; fil_space_t *space= nullptr; @@ -1237,8 +1238,8 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) noexcept pages. To avoid such hang, we adjust the LRU limit lower than the limit for data objects as checked in buf_LRU_check_size_of_non_data_objects() i.e. one page less than 5% of BP. */ - size_t pool_limit= buf_pool.curr_size / 20 - 1; - auto buf_lru_min_len= std::min(pool_limit, BUF_LRU_MIN_LEN); + const size_t buf_lru_min_len= + std::min((buf_pool.usable_size()) / 20 - 1, size_t{BUF_LRU_MIN_LEN}); for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU); bpage && @@ -1372,11 +1373,12 @@ Whether LRU or unzip_LRU is used depends on the state of the system. @param n counts of flushed and evicted pages */ static void buf_do_LRU_batch(ulint max, flush_counters_t *n) noexcept { - if (buf_LRU_evict_from_unzip_LRU()) + const size_t to_withdraw= buf_pool.to_withdraw(); + if (!to_withdraw && buf_LRU_evict_from_unzip_LRU()) buf_free_from_unzip_LRU_list_batch(); n->evicted= 0; n->flushed= 0; - buf_flush_LRU_list_batch(max, n); + buf_flush_LRU_list_batch(max, n, to_withdraw); mysql_mutex_assert_owner(&buf_pool.mutex); buf_lru_freed_page_count+= n->evicted; @@ -2422,6 +2424,13 @@ func_exit: goto func_exit; } +TPOOL_SUPPRESS_TSAN +bool buf_pool_t::running_out() const noexcept +{ + return !recv_recovery_is_on() && + UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) < n_blocks / 4; +} + TPOOL_SUPPRESS_TSAN bool buf_pool_t::need_LRU_eviction() const noexcept { @@ -2481,7 +2490,7 @@ static void buf_flush_page_cleaner() noexcept (!UT_LIST_GET_LEN(buf_pool.flush_list) || srv_max_dirty_pages_pct_lwm == 0.0)) { - buf_pool.LRU_warned.clear(std::memory_order_release); + buf_pool.LRU_warned_clear(); /* We are idle; wait for buf_pool.page_cleaner_wakeup() */ my_cond_wait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex.m_mutex); @@ -2556,6 +2565,7 @@ static void buf_flush_page_cleaner() noexcept buf_pool.n_flush_inc(); mysql_mutex_unlock(&buf_pool.flush_list_mutex); n= srv_max_io_capacity; + os_aio_wait_until_no_pending_writes(false); mysql_mutex_lock(&buf_pool.mutex); LRU_flush: n= buf_flush_LRU(n); @@ -2659,7 +2669,10 @@ static void buf_flush_page_cleaner() noexcept !buf_pool.need_LRU_eviction()) goto check_oldest_and_set_idle; else + { mysql_mutex_lock(&buf_pool.mutex); + os_aio_wait_until_no_pending_writes(false); + } n= srv_max_io_capacity; n= n >= n_flushed ? n - n_flushed : 0; @@ -2700,11 +2713,14 @@ ATTRIBUTE_COLD void buf_pool_t::LRU_warn() noexcept { mysql_mutex_assert_owner(&mutex); try_LRU_scan= false; - if (!LRU_warned.test_and_set(std::memory_order_acquire)) + if (!LRU_warned) + { + LRU_warned= true; sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!" " %zu blocks are in use and %zu free." " Consider increasing innodb_buffer_pool_size.", UT_LIST_GET_LEN(LRU), UT_LIST_GET_LEN(free)); + } } /** Initialize page_cleaner. */ diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index b3315dabc67..1f5d6eab259 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -38,6 +38,7 @@ Created 11/5/1995 Heikki Tuuri #include "srv0srv.h" #include "srv0mon.h" #include "my_cpu.h" +#include "log.h" /** The number of blocks from the LRU_old pointer onward, including the block pointed to, must be buf_pool.LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV @@ -133,7 +134,7 @@ static inline void incr_LRU_size_in_bytes(const buf_page_t* bpage) buf_pool.stat.LRU_bytes += bpage->physical_size(); - ut_ad(buf_pool.stat.LRU_bytes <= buf_pool.curr_pool_size); + ut_ad(buf_pool.stat.LRU_bytes <= buf_pool.curr_pool_size()); } /** @return whether the unzip_LRU list should be used for evicting a victim @@ -259,89 +260,52 @@ static bool buf_LRU_free_from_common_LRU_list(ulint limit) return(freed); } -/** @return a buffer block from the buf_pool.free list -@retval NULL if the free list is empty */ -buf_block_t* buf_LRU_get_free_only() -{ - buf_block_t* block; - - mysql_mutex_assert_owner(&buf_pool.mutex); - - block = reinterpret_cast( - UT_LIST_GET_FIRST(buf_pool.free)); - - while (block != NULL) { - ut_ad(block->page.in_free_list); - ut_d(block->page.in_free_list = FALSE); - ut_ad(!block->page.oldest_modification()); - ut_ad(!block->page.in_LRU_list); - ut_a(!block->page.in_file()); - UT_LIST_REMOVE(buf_pool.free, &block->page); - - if (!buf_pool.is_shrinking() - || UT_LIST_GET_LEN(buf_pool.withdraw) - >= buf_pool.withdraw_target - || !buf_pool.will_be_withdrawn(block->page)) { - /* No adaptive hash index entries may point to - a free block. */ - assert_block_ahi_empty(block); - - block->page.set_state(buf_page_t::MEMORY); - block->page.set_os_used(); - break; - } - - /* This should be withdrawn */ - UT_LIST_ADD_LAST(buf_pool.withdraw, &block->page); - ut_d(block->in_withdraw_list = true); - - block = reinterpret_cast( - UT_LIST_GET_FIRST(buf_pool.free)); - } - - return(block); -} - /******************************************************************//** Checks how much of buf_pool is occupied by non-data objects like AHI, lock heaps etc. Depending on the size of non-data objects this function will either assert or issue a warning and switch on the status monitor. */ -static void buf_LRU_check_size_of_non_data_objects() +static void buf_LRU_check_size_of_non_data_objects() noexcept { mysql_mutex_assert_owner(&buf_pool.mutex); - if (recv_recovery_is_on() || buf_pool.n_chunks_new != buf_pool.n_chunks) + if (recv_recovery_is_on()) return; - const auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU); + const size_t curr_size{buf_pool.usable_size()}; - if (s < buf_pool.curr_size / 20) - ib::fatal() << "Over 95 percent of the buffer pool is" - " occupied by lock heaps" + auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU); + + if (s < curr_size / 20) + { + sql_print_error("[FATAL] InnoDB: Over 95 percent of the buffer pool is" + " occupied by lock heaps" #ifdef BTR_CUR_HASH_ADAPT - " or the adaptive hash index" + " or the adaptive hash index" #endif /* BTR_CUR_HASH_ADAPT */ - "! Check that your transactions do not set too many" - " row locks, or review if innodb_buffer_pool_size=" - << (buf_pool.curr_size >> (20U - srv_page_size_shift)) - << "M could be bigger."; + "! Check that your transactions do not set too many" + " row locks, or review if innodb_buffer_pool_size=%zuM" + " could be bigger", + curr_size >> (20 - srv_page_size_shift)); + abort(); + } - if (s < buf_pool.curr_size / 3) + if (s < curr_size / 3) { if (!buf_lru_switched_on_innodb_mon && srv_monitor_timer) { /* Over 67 % of the buffer pool is occupied by lock heaps or the adaptive hash index. This may be a memory leak! */ - ib::warn() << "Over 67 percent of the buffer pool is" - " occupied by lock heaps" + sql_print_warning("InnoDB: Over 67 percent of the buffer pool is" + " occupied by lock heaps" #ifdef BTR_CUR_HASH_ADAPT - " or the adaptive hash index" + " or the adaptive hash index" #endif /* BTR_CUR_HASH_ADAPT */ - "! Check that your transactions do not set too many row locks." - " innodb_buffer_pool_size=" - << (buf_pool.curr_size >> (20U - srv_page_size_shift)) - << "M. Starting the InnoDB Monitor to print diagnostics."; + "! Check that your transactions do not set too many" + " row locks. innodb_buffer_pool_size=%zuM." + " Starting the InnoDB Monitor to print diagnostics.", + curr_size >> (20 - srv_page_size_shift)); + buf_lru_switched_on_innodb_mon= true; srv_print_innodb_monitor= TRUE; srv_monitor_timer_schedule_now(); @@ -389,15 +353,15 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) retry: /* If there is a block in the free list, take it */ - block= buf_LRU_get_free_only(); + block= buf_pool.allocate(); if (block) { got_block: const ulint LRU_size= UT_LIST_GET_LEN(buf_pool.LRU); const ulint available= UT_LIST_GET_LEN(buf_pool.free); - const ulint scan_depth= buf_pool.LRU_scan_depth / 2; - ut_ad(LRU_size <= BUF_LRU_MIN_LEN || - available >= scan_depth || buf_pool.need_LRU_eviction()); + const size_t scan_depth{buf_pool.LRU_scan_depth / 2}; + ut_ad(LRU_size <= BUF_LRU_MIN_LEN || available >= scan_depth || + buf_pool.is_shrinking() || buf_pool.need_LRU_eviction()); ut_d(bool signalled = false); @@ -446,7 +410,7 @@ got_block: waited= true; - while (!(block= buf_LRU_get_free_only())) + while (!(block= buf_pool.allocate())) { buf_pool.stat.LRU_waits++; @@ -811,10 +775,10 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) if (zip || !bpage->zip.data || !bpage->frame) { break; } + mysql_mutex_lock(&buf_pool.flush_list_mutex); relocate_compressed: b = static_cast(ut_zalloc_nokey(sizeof *b)); ut_a(b); - mysql_mutex_lock(&buf_pool.flush_list_mutex); new (b) buf_page_t(*bpage); b->frame = nullptr; { @@ -833,7 +797,12 @@ func_exit: hash_lock.unlock(); return(false); } - goto relocate_compressed; + mysql_mutex_lock(&buf_pool.flush_list_mutex); + if (bpage->can_relocate()) { + goto relocate_compressed; + } + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + goto func_exit; } mysql_mutex_assert_owner(&buf_pool.mutex); @@ -872,7 +841,6 @@ func_exit: /* The fields of bpage were copied to b before buf_LRU_block_remove_hashed() was invoked. */ - ut_ad(!b->in_zip_hash); ut_ad(b->in_LRU_list); ut_ad(b->in_page_hash); ut_d(b->in_page_hash = false); @@ -988,24 +956,12 @@ buf_LRU_block_free_non_file_page( if (data != NULL) { block->page.zip.data = NULL; - buf_pool_mutex_exit_forbid(); - ut_ad(block->zip_size()); - buf_buddy_free(data, block->zip_size()); - - buf_pool_mutex_exit_allow(); page_zip_set_size(&block->page.zip, 0); } - if (buf_pool.is_shrinking() - && UT_LIST_GET_LEN(buf_pool.withdraw) < buf_pool.withdraw_target - && buf_pool.will_be_withdrawn(block->page)) { - /* This should be withdrawn */ - UT_LIST_ADD_LAST( - buf_pool.withdraw, - &block->page); - ut_d(block->in_withdraw_list = true); + if (buf_pool.to_withdraw() && buf_pool.withdraw(block->page)) { } else { UT_LIST_ADD_FIRST(buf_pool.free, &block->page); ut_d(block->page.in_free_list = true); @@ -1106,7 +1062,6 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, MEM_CHECK_ADDRESSABLE(bpage->zip.data, bpage->zip_size()); } - ut_ad(!bpage->in_zip_hash); buf_pool.page_hash.remove(chain, bpage); page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); @@ -1118,11 +1073,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, ut_ad(!bpage->oldest_modification()); hash_lock.unlock(); - buf_pool_mutex_exit_forbid(); - buf_buddy_free(bpage->zip.data, bpage->zip_size()); - - buf_pool_mutex_exit_allow(); bpage->lock.free(); ut_free(bpage); return false; @@ -1151,12 +1102,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, ut_ad(!bpage->in_free_list); ut_ad(!bpage->oldest_modification()); ut_ad(!bpage->in_LRU_list); - buf_pool_mutex_exit_forbid(); - buf_buddy_free(data, bpage->zip_size()); - - buf_pool_mutex_exit_allow(); - page_zip_set_size(&bpage->zip, 0); } @@ -1327,7 +1273,7 @@ void buf_LRU_validate() ut_ad(!bpage->frame || reinterpret_cast(bpage) ->in_unzip_LRU_list - == bpage->belongs_to_unzip_LRU()); + == !!bpage->zip.data); if (bpage->is_old()) { const buf_page_t* prev diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 09c30504f52..bddd0739ce3 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -44,7 +44,7 @@ Created 11/5/1995 Heikki Tuuri #include "log.h" #include "mariadb_stats.h" -/** If there are buf_pool.curr_size per the number below pending reads, then +/** If there are buf_pool.curr_size() per the number below pending reads, then read-ahead is not done: this is to prevent flooding the buffer pool with i/o-fixed buffer blocks */ #define BUF_READ_AHEAD_PEND_LIMIT 2 @@ -63,7 +63,6 @@ inline uint32_t buf_pool_t::watch_remove(buf_page_t *w, ut_ad(xtest() || page_hash.lock_get(chain).is_write_locked()); ut_ad(w >= &watch[0]); ut_ad(w < &watch[array_elements(watch)]); - ut_ad(!w->in_zip_hash); ut_ad(!w->zip.data); uint32_t s{w->state()}; @@ -372,7 +371,7 @@ ulint buf_read_ahead_random(const page_id_t page_id, bool ibuf) noexcept return 0; if (os_aio_pending_reads_approx() > - buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT) + buf_pool.curr_size() / BUF_READ_AHEAD_PEND_LIMIT) return 0; fil_space_t* space= fil_space_t::get(page_id.space()); @@ -525,7 +524,7 @@ ulint buf_read_ahead_linear(const page_id_t page_id, bool ibuf) noexcept return 0; if (os_aio_pending_reads_approx() > - buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT) + buf_pool.curr_size() / BUF_READ_AHEAD_PEND_LIMIT) return 0; const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 26177c701ad..9236e7e005d 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -930,7 +930,7 @@ void dict_sys_t::create() noexcept UT_LIST_INIT(table_LRU, &dict_table_t::table_LRU); UT_LIST_INIT(table_non_LRU, &dict_table_t::table_LRU); - const ulint hash_size = buf_pool_get_curr_size() + const ulint hash_size = buf_pool.curr_size() / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); table_hash.create(hash_size); @@ -4393,7 +4393,7 @@ void dict_sys_t::resize() noexcept table_id_hash.free(); temp_id_hash.free(); - const ulint hash_size = buf_pool_get_curr_size() + const ulint hash_size = buf_pool.curr_size() / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); table_hash.create(hash_size); table_id_hash.create(hash_size); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index b407ed3e4d4..58e40e41bdc 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -45,6 +45,7 @@ this program; if not, write to the Free Software Foundation, Inc., #include #include #include +#include #include #include #include @@ -169,13 +170,9 @@ static const long AUTOINC_OLD_STYLE_LOCKING = 0; static const long AUTOINC_NEW_STYLE_LOCKING = 1; static const long AUTOINC_NO_LOCKING = 2; -static constexpr size_t buf_pool_chunk_min_size= 1U << 20; - static ulong innobase_open_files; static long innobase_autoinc_lock_mode; -ulonglong innobase_buffer_pool_size; - /** Percentage of the buffer pool to reserve for 'old' blocks. Connected to buf_LRU_old_ratio. */ static uint innobase_old_blocks_pct; @@ -3678,53 +3675,25 @@ static int innodb_init_abort() DBUG_RETURN(1); } -/** Return the minimum buffer pool size based on page size */ -static inline ulint min_buffer_pool_size() +static void innodb_buffer_pool_size_update(THD* thd,st_mysql_sys_var*,void*, + const void *save) noexcept { - ulint s= (BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4) * srv_page_size; - /* buf_pool_chunk_size minimum is 1M, so round up to a multiple */ - ulint alignment= 1U << 20; - return UT_CALC_ALIGN(s, alignment); + buf_pool.resize(*static_cast(save), thd); } -/** Validate the requested buffer pool size. Also, reserve the necessary -memory needed for buffer pool resize. -@param[in] thd thread handle -@param[in] var pointer to system variable -@param[out] save immediate result for update function -@param[in] value incoming string -@return 0 on success, 1 on failure. -*/ -static -int -innodb_buffer_pool_size_validate( - THD* thd, - struct st_mysql_sys_var* var, - void* save, - struct st_mysql_value* value); - -/** Update the system variable innodb_buffer_pool_size using the "saved" -value. This function is registered as a callback with MySQL. -@param[in] thd thread handle -@param[in] var pointer to system variable -@param[out] var_ptr where the formal string goes -@param[in] save immediate result from check function */ -static -void -innodb_buffer_pool_size_update( - THD* thd, - struct st_mysql_sys_var* var, - void* var_ptr, - const void* save); - -static MYSQL_SYSVAR_ULONGLONG(buffer_pool_size, innobase_buffer_pool_size, +static MYSQL_SYSVAR_SIZE_T(buffer_pool_size, buf_pool.size_in_bytes_requested, PLUGIN_VAR_RQCMDARG, - "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", - innodb_buffer_pool_size_validate, - innodb_buffer_pool_size_update, - 128ULL << 20, - 2ULL << 20, - LLONG_MAX, 1024*1024L); + "The size of the memory buffer InnoDB uses to cache data" + " and indexes of its tables.", + nullptr, innodb_buffer_pool_size_update, 128U << 20, 2U << 20, + size_t(-ssize_t(innodb_buffer_pool_extent_size)), 1U << 20); + +static MYSQL_SYSVAR_SIZE_T(buffer_pool_size_max, buf_pool.size_in_bytes_max, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Maximum innodb_buffer_pool_size", + nullptr, nullptr, 0, 0, + size_t(-ssize_t(innodb_buffer_pool_extent_size)), + innodb_buffer_pool_extent_size); static MYSQL_SYSVAR_UINT(log_write_ahead_size, log_sys.write_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, @@ -3776,29 +3745,6 @@ static ulonglong innodb_prepare_commit_versioned(THD* thd, ulonglong *trx_id) return 0; } -/** Initialize and normalize innodb_buffer_pool_{chunk_,}size. */ -static void innodb_buffer_pool_size_init() -{ - if (srv_buf_pool_chunk_unit > srv_buf_pool_size) - { - /* Size unit of buffer pool is larger than srv_buf_pool_size. - adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */ - srv_buf_pool_chunk_unit = srv_buf_pool_size; - } - else if (srv_buf_pool_chunk_unit == 0) - { - srv_buf_pool_chunk_unit = srv_buf_pool_size / 64; - my_large_page_truncate(&srv_buf_pool_chunk_unit); - } - - if (srv_buf_pool_chunk_unit < buf_pool_chunk_min_size) - srv_buf_pool_chunk_unit = buf_pool_chunk_min_size; - - srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size); - innobase_buffer_pool_size = srv_buf_pool_size; -} - - static bool compression_algorithm_is_not_loaded(ulong compression_algorithm, myf flags) { @@ -3824,323 +3770,299 @@ compression_algorithm_is_not_loaded(ulong compression_algorithm, myf flags) @retval HA_ERR_INITIALIZATION when some parameters are out of range */ static int innodb_init_params() { - DBUG_ENTER("innodb_init_params"); + DBUG_ENTER("innodb_init_params"); - ulong num_pll_degree; + srv_page_size_shift= innodb_page_size_validate(srv_page_size); + if (!srv_page_size_shift) + { + sql_print_error("InnoDB: Invalid page size=%lu.\n", srv_page_size); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - /* Check that values don't overflow on 32-bit systems. */ - if (sizeof(ulint) == 4) { - if (innobase_buffer_pool_size > UINT_MAX32) { - sql_print_error( - "innodb_buffer_pool_size can't be over 4GB" - " on 32-bit systems"); - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - } - } + size_t &min= MYSQL_SYSVAR_NAME(buffer_pool_size).min_val; + min= ut_calc_align + (buf_pool.blocks_in_bytes(BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4), + 1U << 20); + size_t innodb_buffer_pool_size= buf_pool.size_in_bytes_requested; - /* The buffer pool needs to be able to accommodate enough many - pages, even for larger pages */ - MYSQL_SYSVAR_NAME(buffer_pool_size).min_val= min_buffer_pool_size(); + /* With large pages, buffer pool can't grow or shrink. */ + if (!buf_pool.size_in_bytes_max || my_use_large_pages || + innodb_buffer_pool_size > buf_pool.size_in_bytes_max) + buf_pool.size_in_bytes_max= ut_calc_align(innodb_buffer_pool_size, + innodb_buffer_pool_extent_size); - if (innobase_buffer_pool_size < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) { - ib::error() << "innodb_page_size=" - << srv_page_size << " requires " - << "innodb_buffer_pool_size >= " - << (MYSQL_SYSVAR_NAME(buffer_pool_size).min_val >> 20) - << "MiB current " << (innobase_buffer_pool_size >> 20) - << "MiB"; - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + MYSQL_SYSVAR_NAME(buffer_pool_size).max_val= buf_pool.size_in_bytes_max; - if (!ut_is_2pow(log_sys.write_size)) { - sql_print_error("InnoDB: innodb_log_write_ahead_size=%u" - " is not a power of two", - log_sys.write_size); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if (innodb_buffer_pool_size < min) + { + sql_print_error("InnoDB: innodb_page_size=%lu requires " + "innodb_buffer_pool_size >= %zu MiB current %zu MiB", + srv_page_size, min >> 20, innodb_buffer_pool_size >> 20); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - if (compression_algorithm_is_not_loaded(innodb_compression_algorithm, ME_ERROR_LOG)) - DBUG_RETURN(HA_ERR_INITIALIZATION); + if (!ut_is_2pow(log_sys.write_size)) + { + sql_print_error("InnoDB: innodb_log_write_ahead_size=%u" + " is not a power of two", + log_sys.write_size); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - if ((srv_encrypt_tables || srv_encrypt_log - || innodb_encrypt_temporary_tables) - && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) { - sql_print_error("InnoDB: cannot enable encryption, " - "encryption plugin is not available"); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if (compression_algorithm_is_not_loaded(innodb_compression_algorithm, ME_ERROR_LOG)) + DBUG_RETURN(HA_ERR_INITIALIZATION); + + if ((srv_encrypt_tables || srv_encrypt_log || + innodb_encrypt_temporary_tables) && + !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) + { + sql_print_error("InnoDB: cannot enable encryption, " + "encryption plugin is not available"); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } #ifdef _WIN32 - if (!is_filename_allowed(srv_buf_dump_filename, - strlen(srv_buf_dump_filename), FALSE)) { - sql_print_error("InnoDB: innodb_buffer_pool_filename" - " cannot have colon (:) in the file name."); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if (!is_filename_allowed(srv_buf_dump_filename, + strlen(srv_buf_dump_filename), false)) + { + sql_print_error("InnoDB: innodb_buffer_pool_filename" + " cannot have colon (:) in the file name."); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } #endif - /* First calculate the default path for innodb_data_home_dir etc., - in case the user has not given any value. + /* First calculate the default path for innodb_data_home_dir etc., + in case the user has not given any value. - Note that when using the embedded server, the datadirectory is not - necessarily the current directory of this program. */ + Note that when using the embedded server, the datadirectory is not + necessarily the current directory of this program. */ - fil_path_to_mysql_datadir = + fil_path_to_mysql_datadir = #ifndef HAVE_REPLICATION - mysqld_embedded ? mysql_real_data_home : + mysqld_embedded ? mysql_real_data_home : #endif - "./"; + "./"; - /* Set InnoDB initialization parameters according to the values - read from MySQL .cnf file */ + /* Set InnoDB initialization parameters according to the values + read from MySQL .cnf file */ - /* The default dir for data files is the datadir of MySQL */ + /* The default dir for data files is the datadir of MySQL */ - srv_data_home = innobase_data_home_dir - ? innobase_data_home_dir - : const_cast(fil_path_to_mysql_datadir); + srv_data_home= innobase_data_home_dir + ? innobase_data_home_dir + : const_cast(fil_path_to_mysql_datadir); #ifdef WITH_WSREP - /* If we use the wsrep API, then we need to tell the server - the path to the data files (for passing it to the SST scripts): */ - wsrep_set_data_home_dir(srv_data_home); + /* If we use the wsrep API, then we need to tell the server + the path to the data files (for passing it to the SST scripts): */ + wsrep_set_data_home_dir(srv_data_home); #endif /* WITH_WSREP */ - /*--------------- Shared tablespaces -------------------------*/ + /*--------------- Shared tablespaces -------------------------*/ - /* Check that the value of system variable innodb_page_size was - set correctly. Its value was put into srv_page_size. If valid, - return the associated srv_page_size_shift. */ - srv_page_size_shift = innodb_page_size_validate(srv_page_size); - if (!srv_page_size_shift) { - sql_print_error("InnoDB: Invalid page size=%lu.\n", - srv_page_size); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + /* Check that the value of system variable innodb_page_size was + set correctly. Its value was put into srv_page_size. If valid, + return the associated srv_page_size_shift. */ - srv_sys_space.set_space_id(TRX_SYS_SPACE); + srv_sys_space.set_space_id(TRX_SYS_SPACE); + /* Temporary tablespace is in full crc32 format. */ + srv_tmp_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER | + FSP_FLAGS_FCRC32_PAGE_SSIZE()); - switch (srv_checksum_algorithm) { - case SRV_CHECKSUM_ALGORITHM_FULL_CRC32: - case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32: - srv_sys_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER - | FSP_FLAGS_FCRC32_PAGE_SSIZE()); - break; - default: - srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE()); - } + switch (srv_checksum_algorithm) { + case SRV_CHECKSUM_ALGORITHM_FULL_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32: + srv_sys_space.set_flags(srv_tmp_space.flags()); + break; + default: + srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE()); + } - srv_sys_space.set_path(srv_data_home); + srv_sys_space.set_path(srv_data_home); - /* Supports raw devices */ - if (!srv_sys_space.parse_params(innobase_data_file_path, true)) { - ib::error() << "Unable to parse innodb_data_file_path=" - << innobase_data_file_path; - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if (!srv_sys_space.parse_params(innobase_data_file_path, true)) + { + sql_print_error("InnoDB: Unable to parse innodb_data_file_path=%s", + innobase_data_file_path); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - srv_tmp_space.set_path(srv_data_home); + srv_tmp_space.set_path(srv_data_home); - /* Temporary tablespace is in full crc32 format. */ - srv_tmp_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER - | FSP_FLAGS_FCRC32_PAGE_SSIZE()); + if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) + { + sql_print_error("InnoDB: Unable to parse innodb_temp_data_file_path=%s", + innobase_temp_data_file_path); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) { - ib::error() << "Unable to parse innodb_temp_data_file_path=" - << innobase_temp_data_file_path; - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + /* Perform all sanity check before we take action of deleting files*/ + if (srv_sys_space.intersection(&srv_tmp_space)) + { + sql_print_error("innodb_temporary and innodb_system" + " file names seem to be the same."); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - /* Perform all sanity check before we take action of deleting files*/ - if (srv_sys_space.intersection(&srv_tmp_space)) { - sql_print_error("innodb_temporary and innodb_system" - " file names seem to be the same."); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + srv_sys_space.normalize_size(); + srv_tmp_space.normalize_size(); - srv_sys_space.normalize_size(); - srv_tmp_space.normalize_size(); + /* ------------ UNDO tablespaces files ---------------------*/ + if (!srv_undo_dir) + srv_undo_dir= const_cast(fil_path_to_mysql_datadir); - /* ------------ UNDO tablespaces files ---------------------*/ - if (!srv_undo_dir) { - srv_undo_dir = const_cast(fil_path_to_mysql_datadir); - } + if (strchr(srv_undo_dir, ';')) + { + sql_print_error("syntax error in innodb_undo_directory"); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - if (strchr(srv_undo_dir, ';')) { - sql_print_error("syntax error in innodb_undo_directory"); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if (!srv_log_group_home_dir) + srv_log_group_home_dir= const_cast(fil_path_to_mysql_datadir); - /* -------------- All log files ---------------------------*/ + if (strchr(srv_log_group_home_dir, ';')) + { + sql_print_error("syntax error in innodb_log_group_home_dir"); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - /* The default dir for log files is the datadir of MySQL */ + DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL); - if (!srv_log_group_home_dir) { - srv_log_group_home_dir - = const_cast(fil_path_to_mysql_datadir); - } + /* Check that interdependent parameters have sane values. */ + if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) + { + sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm" + " cannot be set higher than" + " innodb_max_dirty_pages_pct.\n" + "InnoDB: Setting" + " innodb_max_dirty_pages_pct_lwm to %lf\n", + srv_max_buf_pool_modified_pct); + srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct; + } - if (strchr(srv_log_group_home_dir, ';')) { - sql_print_error("syntax error in innodb_log_group_home_dir"); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) + { + if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) + /* Avoid overflow. */ + srv_max_io_capacity= SRV_MAX_IO_CAPACITY_LIMIT; + else + /* The user has not set the value. We should set it based on + innodb_io_capacity. */ + srv_max_io_capacity= std::max(2 * srv_io_capacity, 2000UL); + } + else if (srv_max_io_capacity < srv_io_capacity) + { + sql_print_warning("InnoDB: innodb_io_capacity cannot be set higher than" + " innodb_io_capacity_max." + "Setting innodb_io_capacity=%lu", srv_max_io_capacity); + srv_io_capacity= srv_max_io_capacity; + } - DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL); + if (UNIV_PAGE_SIZE_DEF != srv_page_size) + { + sql_print_information("InnoDB: innodb_page_size=%lu", srv_page_size); + srv_max_undo_log_size= + std::max(srv_max_undo_log_size, + ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES) << + srv_page_size_shift); + } - /* Check that interdependent parameters have sane values. */ - if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) { - sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm" - " cannot be set higher than" - " innodb_max_dirty_pages_pct.\n" - "InnoDB: Setting" - " innodb_max_dirty_pages_pct_lwm to %lf\n", - srv_max_buf_pool_modified_pct); + if (innobase_open_files < 10) + innobase_open_files= (srv_file_per_table && tc_size > 300 && + tc_size < open_files_limit) + ? tc_size + : 300; - srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct; - } + if (innobase_open_files > open_files_limit) + { + sql_print_warning("InnoDB: innodb_open_files %lu" + " should not be greater than the open_files_limit %lu", + innobase_open_files, open_files_limit); + if (innobase_open_files > tc_size) + innobase_open_files= tc_size; + } - if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) { + const size_t min_open_files_limit= srv_undo_tablespaces + + srv_sys_space.m_files.size() + srv_tmp_space.m_files.size() + 1; + if (min_open_files_limit > innobase_open_files) + { + sql_print_warning("InnoDB: innodb_open_files=%lu is not greater " + "than the number of system tablespace files, " + "temporary tablespace files, " + "innodb_undo_tablespaces=%lu; adjusting " + "to innodb_open_files=%zu", + innobase_open_files, srv_undo_tablespaces, + min_open_files_limit); + innobase_open_files= ulong(min_open_files_limit); + } - if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) { - /* Avoid overflow. */ - srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT; - } else { - /* The user has not set the value. We should - set it based on innodb_io_capacity. */ - srv_max_io_capacity = - ut_max(2 * srv_io_capacity, 2000UL); - } + srv_max_n_open_files= innobase_open_files; + srv_innodb_status = (ibool) innobase_create_status_file; - } else if (srv_max_io_capacity < srv_io_capacity) { - sql_print_warning("InnoDB: innodb_io_capacity" - " cannot be set higher than" - " innodb_io_capacity_max." - "Setting innodb_io_capacity=%lu", - srv_max_io_capacity); + srv_print_verbose_log= !mysqld_embedded; - srv_io_capacity = srv_max_io_capacity; - } + if (!ut_is_2pow(fts_sort_pll_degree)) + { + ulong n; + for (n= 1; n < fts_sort_pll_degree; n<<= 1) {} + fts_sort_pll_degree= n; + } - if (UNIV_PAGE_SIZE_DEF != srv_page_size) { - ib::info() << "innodb_page_size=" << srv_page_size; - - srv_max_undo_log_size = std::max( - srv_max_undo_log_size, - ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES) - << srv_page_size_shift); - } - - srv_buf_pool_size = ulint(innobase_buffer_pool_size); - - if (innobase_open_files < 10) { - innobase_open_files = 300; - if (srv_file_per_table && tc_size > 300 && tc_size < open_files_limit) { - innobase_open_files = tc_size; - } - } - - if (innobase_open_files > open_files_limit) { - ib::warn() << "innodb_open_files " << innobase_open_files - << " should not be greater" - << " than the open_files_limit " << open_files_limit; - if (innobase_open_files > tc_size) { - innobase_open_files = tc_size; - } - } - - ulint min_open_files_limit = srv_undo_tablespaces - + srv_sys_space.m_files.size() - + srv_tmp_space.m_files.size() + 1; - if (min_open_files_limit > innobase_open_files) { - sql_print_warning( - "InnoDB: innodb_open_files=%lu is not greater " - "than the number of system tablespace files, " - "temporary tablespace files, " - "innodb_undo_tablespaces=%u; adjusting " - "to innodb_open_files=%zu", - innobase_open_files, srv_undo_tablespaces, - min_open_files_limit); - innobase_open_files = (ulong) min_open_files_limit; - } - - srv_max_n_open_files = innobase_open_files; - srv_innodb_status = (ibool) innobase_create_status_file; - - srv_print_verbose_log = mysqld_embedded ? 0 : 1; - - /* Round up fts_sort_pll_degree to nearest power of 2 number */ - for (num_pll_degree = 1; - num_pll_degree < fts_sort_pll_degree; - num_pll_degree <<= 1) { - - /* No op */ - } - - fts_sort_pll_degree = num_pll_degree; - - /* Store the default charset-collation number of this MySQL - installation */ - - data_mysql_default_charset_coll = (ulint) default_charset_info->number; + /* Store the default charset-collation number of this installation */ + data_mysql_default_charset_coll = (ulint) default_charset_info->number; #if !defined _WIN32 && defined O_DIRECT - if (srv_use_atomic_writes && my_may_have_atomic_write) { - /* - Force O_DIRECT on Unixes (on Windows writes are always - unbuffered) - */ - switch (srv_file_flush_method) { - case SRV_O_DIRECT: - case SRV_O_DIRECT_NO_FSYNC: - break; - default: - srv_file_flush_method = SRV_O_DIRECT; - fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n"); - } - } + if (srv_use_atomic_writes && my_may_have_atomic_write) + { + /* Force O_DIRECT on Unixes (on Windows writes are always unbuffered) */ + switch (srv_file_flush_method) { + case SRV_O_DIRECT: + case SRV_O_DIRECT_NO_FSYNC: + break; + default: + srv_file_flush_method= SRV_O_DIRECT; + fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n"); + } + } #endif #if defined __linux__ || defined _WIN32 - if (srv_flush_log_at_trx_commit == 2) { - /* Do not disable the file system cache if - innodb_flush_log_at_trx_commit=2. */ - log_sys.log_buffered = true; - } + if (srv_flush_log_at_trx_commit == 2) + /* Do not disable the file system cache if + innodb_flush_log_at_trx_commit=2. */ + log_sys.log_buffered= true; #endif #if !defined LINUX_NATIVE_AIO && !defined HAVE_URING && !defined _WIN32 - /* Currently native AIO is supported only on windows and linux - and that also when the support is compiled in. In all other - cases, we ignore the setting of innodb_use_native_aio. */ - srv_use_native_aio = FALSE; + /* Currently native AIO is supported only on windows and linux + and that also when the support is compiled in. In all other + cases, we ignore the setting of innodb_use_native_aio. */ + srv_use_native_aio= FALSE; #endif #ifdef HAVE_URING - if (srv_use_native_aio && io_uring_may_be_unsafe) { - sql_print_warning("innodb_use_native_aio may cause " - "hangs with this kernel %s; see " - "https://jira.mariadb.org/browse/MDEV-26674", - io_uring_may_be_unsafe); - } + if (srv_use_native_aio && io_uring_may_be_unsafe) + sql_print_warning("innodb_use_native_aio may cause " + "hangs with this kernel %s; see " + "https://jira.mariadb.org/browse/MDEV-26674", + io_uring_may_be_unsafe); #endif #ifdef _WIN32 - switch (srv_file_flush_method) { - case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */: - srv_file_flush_method = SRV_ALL_O_DIRECT_FSYNC; - break; - case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */: - srv_file_flush_method = SRV_FSYNC; - break; - default: - ut_ad(srv_file_flush_method <= SRV_ALL_O_DIRECT_FSYNC); - } + switch (srv_file_flush_method) { + case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */: + srv_file_flush_method= SRV_ALL_O_DIRECT_FSYNC; + break; + case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */: + srv_file_flush_method= SRV_FSYNC; + break; + default: + ut_ad(srv_file_flush_method <= SRV_ALL_O_DIRECT_FSYNC); + } #else - ut_ad(srv_file_flush_method <= SRV_O_DIRECT_NO_FSYNC); + ut_ad(srv_file_flush_method <= SRV_O_DIRECT_NO_FSYNC); #endif - innodb_buffer_pool_size_init(); - - srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift); - DBUG_RETURN(0); + srv_lock_table_size = 5 * buf_pool.curr_size(); + DBUG_RETURN(0); } /** Initialize the InnoDB storage engine plugin. @@ -17614,22 +17536,6 @@ innodb_stopword_table_validate( return(ret); } -extern void buf_resize_start(); - -/** Update the system variable innodb_buffer_pool_size using the "saved" -value. This function is registered as a callback with MySQL. -@param[in] save immediate result from check function */ -static -void -innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save) -{ - snprintf(export_vars.innodb_buffer_pool_resize_status, - sizeof(export_vars.innodb_buffer_pool_resize_status), - "Buffer pool resize requested"); - - buf_resize_start(); -} - /** The latest assigned innodb_ft_aux_table name */ static char* innodb_ft_aux_table; @@ -19295,11 +19201,12 @@ static MYSQL_SYSVAR_UINT(autoextend_increment, "Data file autoextend increment in megabytes", NULL, NULL, 64, 1, 1000, 0); -static MYSQL_SYSVAR_SIZE_T(buffer_pool_chunk_size, srv_buf_pool_chunk_unit, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Size of a single memory chunk" - " for resizing buffer pool. Online buffer pool resizing happens at this" - " granularity. 0 means autosize this variable based on buffer pool size.", +static size_t innodb_buffer_pool_chunk_size; + +static MYSQL_SYSVAR_SIZE_T(buffer_pool_chunk_size, + innodb_buffer_pool_chunk_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_DEPRECATED, + "Deprecated parameter with no effect", NULL, NULL, 0, 0, SIZE_T_MAX, 1024 * 1024); @@ -20026,6 +19933,7 @@ static MYSQL_SYSVAR_BOOL(encrypt_temporary_tables, innodb_encrypt_temporary_tabl static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), + MYSQL_SYSVAR(buffer_pool_size_max), MYSQL_SYSVAR(buffer_pool_chunk_size), MYSQL_SYSVAR(buffer_pool_filename), MYSQL_SYSVAR(buffer_pool_dump_now), @@ -21130,90 +21038,6 @@ innobase_convert_to_system_charset( cs2, to, static_cast(len), errors))); } -/** Validate the requested buffer pool size. Also, reserve the necessary -memory needed for buffer pool resize. -@param[in] thd thread handle -@param[out] save immediate result for update function -@param[in] value incoming string -@return 0 on success, 1 on failure. -*/ -static -int -innodb_buffer_pool_size_validate( - THD* thd, - st_mysql_sys_var*, - void* save, - struct st_mysql_value* value) -{ - longlong intbuf; - - value->val_int(value, &intbuf); - - if (static_cast(intbuf) < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "innodb_buffer_pool_size must be at least" - " %lld for innodb_page_size=%lu", - MYSQL_SYSVAR_NAME(buffer_pool_size).min_val, - srv_page_size); - return(1); - } - - if (!srv_was_started) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Cannot update innodb_buffer_pool_size," - " because InnoDB is not started."); - return(1); - } - - mysql_mutex_lock(&buf_pool.mutex); - - if (srv_buf_pool_old_size != srv_buf_pool_size) { - mysql_mutex_unlock(&buf_pool.mutex); - my_printf_error(ER_WRONG_ARGUMENTS, - "Another buffer pool resize is already in progress.", MYF(0)); - return(1); - } - - ulint requested_buf_pool_size = buf_pool_size_align(ulint(intbuf)); - - *static_cast(save) = requested_buf_pool_size; - - if (srv_buf_pool_size == ulint(intbuf)) { - mysql_mutex_unlock(&buf_pool.mutex); - /* nothing to do */ - return(0); - } - - if (srv_buf_pool_size == requested_buf_pool_size) { - mysql_mutex_unlock(&buf_pool.mutex); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "innodb_buffer_pool_size must be at least" - " innodb_buffer_pool_chunk_size=%zu", - srv_buf_pool_chunk_unit); - /* nothing to do */ - return(0); - } - - srv_buf_pool_size = requested_buf_pool_size; - mysql_mutex_unlock(&buf_pool.mutex); - - if (intbuf != static_cast(requested_buf_pool_size)) { - char buf[64]; - int len = 64; - value->val_str(value, buf, &len); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_TRUNCATED_WRONG_VALUE, - "Truncated incorrect %-.32s value: '%-.128s'", - mysql_sysvar_buffer_pool_size.name, - value->val_str(value, buf, &len)); - } - - return(0); -} - /*************************************************************//** Check for a valid value of innobase_compression_algorithm. @return 0 for valid innodb_compression_algorithm. */ @@ -21509,19 +21333,3 @@ void ins_node_t::vers_update_end(row_prebuilt_t *prebuilt, bool history_row) if (UNIV_LIKELY_NULL(local_heap)) mem_heap_free(local_heap); } - -/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit, -if needed. -@param[in] size size in bytes -@return aligned size */ -ulint buf_pool_size_align(ulint size) noexcept -{ - const size_t m = srv_buf_pool_chunk_unit; - size = ut_max(size, (size_t) MYSQL_SYSVAR_NAME(buffer_pool_size).min_val); - - if (size % m == 0) { - return(size); - } else { - return (size / m + 1) * m; - } -} diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 0554a229e5e..96f848de033 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -3388,7 +3388,7 @@ static int i_s_innodb_stats_fill(THD *thd, TABLE_LIST * tables, Item *) DBUG_RETURN(0); } - buf_stats_get_pool_info(&info); + buf_pool.get_info(&info); table = tables->table; @@ -3937,87 +3937,37 @@ and fetch information to information schema tables: INNODB_BUFFER_PAGE. @return 0 on success, 1 on failure */ static int i_s_innodb_buffer_page_fill(THD *thd, TABLE_LIST *tables, Item *) { - int status = 0; - mem_heap_t* heap; + DBUG_ENTER("i_s_innodb_buffer_page_fill"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str); - DBUG_ENTER("i_s_innodb_buffer_page_fill"); + /* deny access to user without PROCESS privilege */ + if (check_global_access(thd, PROCESS_ACL)) + DBUG_RETURN(0); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str); + int status; + buf_page_info_t *b= + static_cast(my_malloc(PSI_INSTRUMENT_ME, + MAX_BUF_INFO_CACHED * sizeof *b, + MYF(MY_WME))); + if (!b) + DBUG_RETURN(1); + for (size_t j= 0;;) + { + memset((void*) b, 0, MAX_BUF_INFO_CACHED * sizeof *b); + mysql_mutex_lock(&buf_pool.mutex); + const size_t N= buf_pool.curr_size(); + const size_t n= std::min(N, MAX_BUF_INFO_CACHED); + for (size_t i= 0; i < n && j < N; i++, j++) + i_s_innodb_buffer_page_get_info(&buf_pool.get_nth_page(j)->page, j, + &b[i]); - /* deny access to user without PROCESS privilege */ - if (check_global_access(thd, PROCESS_ACL)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(10000); - - for (ulint n = 0; - n < ut_min(buf_pool.n_chunks, buf_pool.n_chunks_new); n++) { - const buf_block_t* block; - ulint n_blocks; - buf_page_info_t* info_buffer; - ulint num_page; - ulint mem_size; - ulint chunk_size; - ulint num_to_process = 0; - ulint block_id = 0; - - /* Get buffer block of the nth chunk */ - block = buf_pool.chunks[n].blocks; - chunk_size = buf_pool.chunks[n].size; - num_page = 0; - - while (chunk_size > 0) { - /* we cache maximum MAX_BUF_INFO_CACHED number of - buffer page info */ - num_to_process = ut_min(chunk_size, - (ulint)MAX_BUF_INFO_CACHED); - - mem_size = num_to_process * sizeof(buf_page_info_t); - - /* For each chunk, we'll pre-allocate information - structures to cache the page information read from - the buffer pool. Doing so before obtain any mutex */ - info_buffer = (buf_page_info_t*) mem_heap_zalloc( - heap, mem_size); - - /* Obtain appropriate mutexes. Since this is diagnostic - buffer pool info printout, we are not required to - preserve the overall consistency, so we can - release mutex periodically */ - mysql_mutex_lock(&buf_pool.mutex); - - /* GO through each block in the chunk */ - for (n_blocks = num_to_process; n_blocks--; block++) { - i_s_innodb_buffer_page_get_info( - &block->page, block_id, - info_buffer + num_page); - block_id++; - num_page++; - } - - mysql_mutex_unlock(&buf_pool.mutex); - - /* Fill in information schema table with information - just collected from the buffer chunk scan */ - status = i_s_innodb_buffer_page_fill( - thd, tables, info_buffer, - num_page); - - /* If something goes wrong, break and return */ - if (status) { - break; - } - - mem_heap_empty(heap); - chunk_size -= num_to_process; - num_page = 0; - } - } - - mem_heap_free(heap); - - DBUG_RETURN(status); + mysql_mutex_unlock(&buf_pool.mutex); + status= i_s_innodb_buffer_page_fill(thd, tables, b, n); + if (status || j >= N) + break; + } + my_free(b); + DBUG_RETURN(status); } /*******************************************************************//** diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index b594b86d1ea..07c8a13a72c 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -500,9 +500,7 @@ ibuf_max_size_update( { if (UNIV_UNLIKELY(!ibuf.index)) return; ulint new_size = std::min( - (buf_pool_get_curr_size() >> srv_page_size_shift) * new_val - / 100, uint32_t(~0U)); - + buf_pool.curr_size() * new_val / 100, uint32_t(~0U)); mysql_mutex_lock(&ibuf_mutex); ibuf.max_size = uint32_t(new_size); mysql_mutex_unlock(&ibuf_mutex); @@ -2056,8 +2054,7 @@ corruption: } } - limit = ut_min(IBUF_MAX_N_PAGES_MERGED, - buf_pool_get_curr_size() / 4); + limit = std::min(IBUF_MAX_N_PAGES_MERGED, buf_pool.curr_size() / 4); first_page_no = ibuf_rec_get_page_no(mtr, rec); first_space_id = ibuf_rec_get_space(mtr, rec); diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h index b75cad10180..6d084e8c651 100644 --- a/storage/innobase/include/btr0sea.h +++ b/storage/innobase/include/btr0sea.h @@ -39,12 +39,16 @@ extern mysql_pfs_key_t btr_search_latch_key; #define btr_search_sys_create() btr_search_sys.create() #define btr_search_sys_free() btr_search_sys.free() -/** Disable the adaptive hash search system and empty the index. */ -void btr_search_disable(); +/** Lazily free detached metadata when removing the last reference. */ +ATTRIBUTE_COLD void btr_search_lazy_free(dict_index_t *index); + +/** Disable the adaptive hash search system and empty the index. +@return whether the adaptive hash index was enabled */ +ATTRIBUTE_COLD bool btr_search_disable(); /** Enable the adaptive hash search system. @param resize whether buf_pool_t::resize() is the caller */ -void btr_search_enable(bool resize= false); +ATTRIBUTE_COLD void btr_search_enable(bool resize= false); /*********************************************************************//** Updates the search info. */ diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h index bb9994203d6..9ac26c7d4be 100644 --- a/storage/innobase/include/buf0buddy.h +++ b/storage/innobase/include/buf0buddy.h @@ -24,17 +24,13 @@ Binary buddy allocator for compressed pages Created December 2006 by Marko Makela *******************************************************/ -#ifndef buf0buddy_h -#define buf0buddy_h - +#pragma once #include "buf0types.h" /** @param[in] block size in bytes @return index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */ -inline -ulint -buf_buddy_get_slot(ulint size) +inline ulint buf_buddy_get_slot(ulint size) noexcept { ulint i; ulint s; @@ -53,13 +49,13 @@ buf_buddy_get_slot(ulint size) @param i index of buf_pool.zip_free[] or BUF_BUDDY_SIZES @param lru assigned to true if buf_pool.mutex was temporarily released @return allocated block, never NULL */ -byte *buf_buddy_alloc_low(ulint i, bool *lru) MY_ATTRIBUTE((malloc)); +byte *buf_buddy_alloc_low(ulint i, bool *lru) noexcept MY_ATTRIBUTE((malloc)); /** Allocate a ROW_FORMAT=COMPRESSED block. @param size compressed page size in bytes @param lru assigned to true if buf_pool.mutex was temporarily released @return allocated block, never NULL */ -inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr) +inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr) noexcept { return buf_buddy_alloc_low(buf_buddy_get_slot(size), lru); } @@ -68,24 +64,26 @@ inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr) @param[in] buf block to be freed, must not be pointed to by the buffer pool @param[in] i index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */ -void buf_buddy_free_low(void* buf, ulint i); +void buf_buddy_free_low(void* buf, ulint i) noexcept; /** Deallocate a block. @param[in] buf block to be freed, must not be pointed to by the buffer pool @param[in] size block size in bytes */ -inline void buf_buddy_free(void* buf, ulint size) +inline void buf_buddy_free(void* buf, ulint size) noexcept { - buf_buddy_free_low(buf, buf_buddy_get_slot(size)); + buf_buddy_free_low(buf, buf_buddy_get_slot(size)); } -/** Try to reallocate a block. -@param[in] buf block to be reallocated, must be pointed -to by the buffer pool -@param[in] size block size, up to srv_page_size -@retval false if failed because of no free blocks. */ -bool buf_buddy_realloc(void* buf, ulint size); +ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull, warn_unused_result)) +/** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::resize(). +@param bpage page descriptor covering a ROW_FORMAT=COMPRESSED page +@param block uncompressed block for storage +@return block +@retval nullptr if the block was consumed */ +ATTRIBUTE_COLD +buf_block_t *buf_buddy_shrink(buf_page_t *bpage, buf_block_t *block) noexcept; -/** Combine all pairs of free buddies. */ -void buf_buddy_condense_free(); -#endif /* buf0buddy_h */ +/** Combine all pairs of free buddies. +@param size the target innodb_buffer_pool_size */ +ATTRIBUTE_COLD void buf_buddy_condense_free(size_t size) noexcept; diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index bb3846038e2..0a3f04c9844 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -35,13 +35,16 @@ Created 11/5/1995 Heikki Tuuri #include "assume_aligned.h" #include "buf0types.h" #ifndef UNIV_INNOCHECKSUM -#include "ut0byte.h" #include "page0types.h" #include "log0log.h" #include "srv0srv.h" #include "transactional_lock_guard.h" #include +/** The allocation granularity of innodb_buffer_pool_size */ +constexpr size_t innodb_buffer_pool_extent_size= + sizeof(size_t) < 8 ? 2 << 20 : 8 << 20; + /** @name Modes for buf_page_get_gen */ /* @{ */ #define BUF_GET 10 /*!< get always */ @@ -71,7 +74,7 @@ struct buf_pool_info_t ulint pool_size; /*!< Buffer Pool size in pages */ ulint lru_len; /*!< Length of buf_pool.LRU */ ulint old_lru_len; /*!< buf_pool.LRU_old_len */ - ulint free_list_len; /*!< Length of buf_pool.free list */ + ulint free_list_len; /*!< free + lazy_allocate_size() */ ulint flush_list_len; /*!< Length of buf_pool.flush_list */ ulint n_pend_unzip; /*!< buf_pool.n_pend_unzip, pages pending decompress */ @@ -142,10 +145,8 @@ operator<<( const page_id_t page_id); #ifndef UNIV_INNOCHECKSUM -# define buf_pool_get_curr_size() srv_buf_pool_curr_size # define buf_block_free(block) buf_pool.free_block(block) - -#define buf_page_get(ID, SIZE, LA, MTR) \ +# define buf_page_get(ID, SIZE, LA, MTR) \ buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, MTR) /** Try to buffer-fix a page. @@ -395,9 +396,6 @@ void buf_print_io( /*=========*/ FILE* file); /*!< in: file where to print */ -/** Collect buffer pool metadata. -@param[out] pool_info buffer pool metadata */ -void buf_stats_get_pool_info(buf_pool_info_t *pool_info) noexcept; /** Refresh the statistics used to print per-second averages. */ void buf_refresh_io_stats() noexcept; @@ -427,12 +425,6 @@ counter value in MONITOR_MODULE_BUF_PAGE. ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read) noexcept; -/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit, -if needed. -@param[in] size size in bytes -@return aligned size */ -ulint buf_pool_size_align(ulint size) noexcept; - /** Verify that post encryption checksum match with the calculated checksum. This function should be called only if tablespace contains crypt data metadata. @param page page frame @@ -549,7 +541,7 @@ public: /** buf_pool.LRU status mask in state() */ static constexpr uint32_t LRU_MASK= 7U << 29; - /** lock covering the contents of frame */ + /** lock covering the contents of frame() */ block_lock lock; /** pointer to aligned, uncompressed page frame of innodb_page_size */ byte *frame; @@ -559,8 +551,6 @@ public: !frame && !zip.data means an active buf_pool.watch */ page_zip_des_t zip; #ifdef UNIV_DEBUG - /** whether this->list is in buf_pool.zip_hash; protected by buf_pool.mutex */ - bool in_zip_hash; /** whether this->LRU is in buf_pool.LRU (in_file()); protected by buf_pool.mutex */ bool in_LRU_list; @@ -574,7 +564,7 @@ public: /** list member in one of the lists of buf_pool; protected by buf_pool.mutex or buf_pool.flush_list_mutex - state() == NOT_USED: buf_pool.free or buf_pool.withdraw + state() == NOT_USED: buf_pool.free in_file() && oldest_modification(): buf_pool.flush_list (protected by buf_pool.flush_list_mutex) @@ -615,7 +605,7 @@ public: lock() /* not copied */, frame(b.frame), zip(b.zip), #ifdef UNIV_DEBUG - in_zip_hash(b.in_zip_hash), in_LRU_list(b.in_LRU_list), + in_LRU_list(b.in_LRU_list), in_page_hash(b.in_page_hash), in_free_list(b.in_free_list), #endif /* UNIV_DEBUG */ list(b.list), LRU(b.LRU), old(b.old), freed_page_clock(b.freed_page_clock), @@ -632,7 +622,6 @@ public: id_= id; zip.fix= state; oldest_modification_= 0; - ut_d(in_zip_hash= false); ut_d(in_free_list= false); ut_d(in_LRU_list= false); ut_d(in_page_hash= false); @@ -891,10 +880,6 @@ struct buf_block_t{ buf_pool.page_hash can point to buf_page_t or buf_block_t */ #ifdef UNIV_DEBUG - /** whether page.list is in buf_pool.withdraw - ((state() == NOT_USED)) and the buffer pool is being shrunk; - protected by buf_pool.mutex */ - bool in_withdraw_list; /** whether unzip_LRU is in buf_pool.unzip_LRU (in_file() && frame && zip.data); protected by buf_pool.mutex */ @@ -1022,15 +1007,10 @@ struct buf_block_t{ @param state initial state() */ void initialise(const page_id_t page_id, ulint zip_size, uint32_t state) noexcept; -}; -/**********************************************************************//** -Compute the hash fold value for blocks in buf_pool.zip_hash. */ -/* @{ */ -#define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift) -#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->page.frame) -#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) -/* @} */ + /** Calculate the page frame address */ + IF_DBUG(,inline) byte *frame_address() const noexcept; +}; /** A "Hazard Pointer" class used to iterate over buf_pool.LRU or buf_pool.flush_list. A hazard pointer is a buf_page_t pointer @@ -1198,59 +1178,62 @@ struct buf_buddy_stat_t { /** The buffer pool */ class buf_pool_t { - /** A chunk of buffers */ - struct chunk_t - { - /** number of elements in blocks[] */ - size_t size; - /** memory allocated for the page frames */ - unsigned char *mem; - /** descriptor of mem */ - ut_new_pfx_t mem_pfx; - /** array of buffer control blocks */ - buf_block_t *blocks; + /** arrays of buf_block_t followed by page frames; + aliged to and repeating every innodb_buffer_pool_extent_size; + each extent comprises pages_in_extent[] blocks */ + alignas(CPU_LEVEL1_DCACHE_LINESIZE) char *memory; + /** the allocation of the above memory, possibly including some + alignment loss at the beginning */ + char *memory_unaligned; + /** the virtual address range size of memory_unaligned */ + size_t size_unaligned; +#ifdef UNIV_PFS_MEMORY + /** the "owner thread" of the buffer pool allocation */ + PSI_thread *owner; +#endif + /** initialized number of block descriptors */ + size_t n_blocks; + /** number of blocks that need to be freed in shrink() */ + size_t n_blocks_to_withdraw; + /** first block to withdraw in shrink() */ + const buf_page_t *first_to_withdraw; - /** Map of first page frame address to chunks[] */ - using map= std::map, - ut_allocator>>; - /** Chunk map that may be under construction by buf_resize_thread() */ - static map *map_reg; - /** Current chunk map for lookup only */ - static map *map_ref; + /** amount of memory allocated to the buffer pool and descriptors; + protected by mutex */ + Atomic_relaxed size_in_bytes; - /** @return the memory size bytes. */ - size_t mem_size() const noexcept { return mem_pfx.m_size; } - - /** Register the chunk */ - void reg() noexcept - { map_reg->emplace(map::value_type(blocks->page.frame, this)); } - - /** Allocate a chunk of buffer frames. - @param bytes requested size - @return whether the allocation succeeded */ - inline bool create(size_t bytes) noexcept; - -#ifdef UNIV_DEBUG - /** Find a block that points to a ROW_FORMAT=COMPRESSED page - @param data pointer to the start of a ROW_FORMAT=COMPRESSED page frame - @return the block - @retval nullptr if not found */ - const buf_block_t *contains_zip(const void *data) const noexcept - { - const buf_block_t *block= blocks; - for (auto i= size; i--; block++) - if (block->page.zip.data == data) - return block; - return nullptr; - } - - /** Check that all blocks are in a replaceable state. - @return address of a non-free block - @retval nullptr if all freed */ - inline const buf_block_t *not_freed() const noexcept; -#endif /* UNIV_DEBUG */ - }; public: + /** The requested innodb_buffer_pool_size */ + size_t size_in_bytes_requested; + /** The maximum allowed innodb_buffer_pool_size */ + size_t size_in_bytes_max; + + /** @return the current size of the buffer pool, in bytes */ + size_t curr_pool_size() const noexcept { return size_in_bytes; } + + /** @return the current size of the buffer pool, in pages */ + TPOOL_SUPPRESS_TSAN size_t curr_size() const noexcept { return n_blocks; } + /** @return the maximum usable size of the buffer pool, in pages */ + TPOOL_SUPPRESS_TSAN size_t usable_size() const noexcept + { return n_blocks - n_blocks_to_withdraw - UT_LIST_GET_LEN(withdrawn); } + + /** Determine the used size of the buffer pool in bytes. + @param n_blocks size of the buffer pool in blocks + @return the size needed for n_blocks in bytes, for innodb_page_size */ + static size_t blocks_in_bytes(size_t n_blocks) noexcept; + +#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP) + /** Enable buffers to be dumped to core files. + + A convenience function, not called anyhwere directly however + it is left available for gdb or any debugger to call + in the event that you want all of the memory to be dumped + to a core file. + + @return number of errors found in madvise() calls */ + static int madvise_do_dump() noexcept; +#endif + /** Hash cell chain in page_hash_table */ struct hash_chain { @@ -1258,106 +1241,58 @@ public: buf_page_t *first; }; private: - /** Withdraw blocks from the buffer pool until meeting withdraw_target. - @return whether retry is needed */ - inline bool withdraw_blocks() noexcept; + /** Determine the number of blocks in a buffer pool of a particular size. + @param size_in_bytes innodb_buffer_pool_size in bytes + @return number of buffer pool pages */ + static size_t get_n_blocks(size_t size_in_bytes) noexcept; - /** Determine if a pointer belongs to a buf_block_t. It can be a pointer to - the buf_block_t itself or a member of it. - @param ptr a pointer that will not be dereferenced - @return whether the ptr belongs to a buf_block_t struct */ - bool is_block_field(const void *ptr) const noexcept - { - const chunk_t *chunk= chunks; - const chunk_t *const echunk= chunk + ut_min(n_chunks, n_chunks_new); + /** The outcome of shrink() */ + enum shrink_status{SHRINK_DONE= -1, SHRINK_IN_PROGRESS= 0, SHRINK_ABORT}; - /* TODO: protect chunks with a mutex (the older pointer will - currently remain during resize()) */ - for (; chunk < echunk; chunk++) - if (ptr >= reinterpret_cast(chunk->blocks) && - ptr < reinterpret_cast(chunk->blocks + chunk->size)) - return true; - return false; - } + /** Attempt to shrink the buffer pool. + @param size requested innodb_buffer_pool_size in bytes + @retval whether the shrinking was completed */ + ATTRIBUTE_COLD shrink_status shrink(size_t size) noexcept; - /** Try to reallocate a control block. - @param block control block to reallocate - @return whether the reallocation succeeded */ - inline bool realloc(buf_block_t *block) noexcept; + /** Finish shrinking the buffer pool. + @param size the new innodb_buffer_pool_size in bytes + @param reduced how much the innodb_buffer_pool_size was reduced */ + inline void shrunk(size_t size, size_t reduced) noexcept; public: - bool is_initialised() const noexcept { return chunks != nullptr; } + bool is_initialised() const noexcept { return memory != nullptr; } /** Create the buffer pool. @return whether the creation failed */ - bool create(); + bool create() noexcept; /** Clean up after successful create() */ void close() noexcept; - /** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */ - inline void resize(); + /** Resize the buffer pool. + @param size requested innodb_buffer_pool_size in bytes + @param thd current connnection */ + ATTRIBUTE_COLD void resize(size_t size, THD *thd) noexcept; -#ifdef __linux__ /** Collect garbage (release pages from the LRU list) */ - inline void garbage_collect(); -#endif + inline void garbage_collect() noexcept; - /** @return whether resize() is in progress */ - bool resize_in_progress() const noexcept - { - return UNIV_UNLIKELY(resizing.load(std::memory_order_relaxed)); - } - - /** @return the current size in blocks */ - size_t get_n_pages() const noexcept - { - ut_ad(is_initialised()); - size_t size= 0; - for (auto j= ut_min(n_chunks_new, n_chunks); j--; ) - size+= chunks[j].size; - return size; - } - - /** Determine whether a frame is intended to be withdrawn during resize(). + /** Determine whether a frame needs to be withdrawn during resize(). @param ptr pointer within a buf_page_t::frame + @param size size_in_bytes_requested @return whether the frame will be withdrawn */ - bool will_be_withdrawn(const byte *ptr) const noexcept + bool will_be_withdrawn(const byte *ptr, size_t size) const noexcept { - ut_ad(n_chunks_new < n_chunks); -#ifdef SAFE_MUTEX - if (resize_in_progress()) - mysql_mutex_assert_owner(&mutex); -#endif /* SAFE_MUTEX */ - - for (const chunk_t *chunk= chunks + n_chunks_new, - * const echunk= chunks + n_chunks; - chunk != echunk; chunk++) - if (ptr >= chunk->blocks->page.frame && - ptr < (chunk->blocks + chunk->size - 1)->page.frame + srv_page_size) - return true; - return false; + const char *p= reinterpret_cast(ptr); + ut_ad(p >= memory); + ut_ad(p < memory + size_in_bytes_max); + return p >= memory + size; } - /** Determine whether a block is intended to be withdrawn during resize(). + /** Withdraw a block if needed in case resize() is shrinking. @param bpage buffer pool block - @return whether the frame will be withdrawn */ - bool will_be_withdrawn(const buf_page_t &bpage) const noexcept - { - ut_ad(n_chunks_new < n_chunks); -#ifdef SAFE_MUTEX - if (resize_in_progress()) - mysql_mutex_assert_owner(&mutex); -#endif /* SAFE_MUTEX */ - - for (const chunk_t *chunk= chunks + n_chunks_new, - * const echunk= chunks + n_chunks; - chunk != echunk; chunk++) - if (&bpage >= &chunk->blocks->page && - &bpage < &chunk->blocks[chunk->size].page) - return true; - return false; - } + @return whether the block was withdrawn */ + ATTRIBUTE_COLD bool withdraw(buf_page_t &bpage) noexcept; /** Release and evict a corrupted page. @param bpage x-latched page that was found corrupted @@ -1371,31 +1306,18 @@ public: #ifdef UNIV_DEBUG /** Find a block that points to a ROW_FORMAT=COMPRESSED page @param data pointer to the start of a ROW_FORMAT=COMPRESSED page frame + @param shift number of least significant address bits to ignore @return the block @retval nullptr if not found */ - const buf_block_t *contains_zip(const void *data) const noexcept - { - mysql_mutex_assert_owner(&mutex); - for (const chunk_t *chunk= chunks, * const end= chunks + n_chunks; - chunk != end; chunk++) - if (const buf_block_t *block= chunk->contains_zip(data)) - return block; - return nullptr; - } - + const buf_block_t *contains_zip(const void *data, size_t shift= 0) + const noexcept; /** Assert that all buffer pool pages are in a replaceable state */ void assert_all_freed() noexcept; #endif /* UNIV_DEBUG */ #ifdef BTR_CUR_HASH_ADAPT /** Clear the adaptive hash index on all pages in the buffer pool. */ - inline void clear_hash_index() noexcept; - - /** Get a buffer block from an adaptive hash index pointer. - This function does not return if the block is not identified. - @param ptr pointer to within a page frame - @return pointer to block, never NULL */ - inline buf_block_t *block_from_ahi(const byte *ptr) const noexcept; + void clear_hash_index() noexcept; #endif /* BTR_CUR_HASH_ADAPT */ /** @@ -1418,13 +1340,27 @@ public: return empty_lsn; } - /** Determine if a buffer block was created by chunk_t::create(). - @param block block descriptor (not dereferenced) - @return whether block has been created by chunk_t::create() */ - bool is_uncompressed(const buf_block_t *block) const noexcept + /** Look up the block descriptor for a page frame address. + @param ptr address within a valid page frame + @return the corresponding block descriptor */ + static buf_block_t *block_from(const void *ptr) noexcept; + + /** Access a block while holding the buffer pool mutex. + @param pos position between 0 and get_n_pages() + @return the block descriptor */ + buf_block_t *get_nth_page(size_t pos) const noexcept; + +#ifdef UNIV_DEBUG + /** Determine if an object is within the curr_pool_size() + and associated with an uncompressed page. + @param ptr memory object (not dereferenced) + @return whether the object is valid in the current buffer pool */ + bool is_uncompressed_current(const void *ptr) const noexcept { - return is_block_field(reinterpret_cast(block)); + const ptrdiff_t d= static_cast(ptr) - memory; + return d >= 0 && size_t(d) < curr_pool_size(); } +#endif public: /** page_fix() mode of operation */ @@ -1456,6 +1392,16 @@ public: buf_block_t *page_fix(const page_id_t id) noexcept { return page_fix(id, nullptr, FIX_WAIT_READ); } + /** Validate a block descriptor. + @param b block descriptor that may be invalid after shrink() + @param latch page_hash latch for id + @param id page identifier + @return b->page.fix() if b->page.id() == id + @retval 0 if b is invalid */ + TRANSACTIONAL_TARGET + uint32_t page_guess(buf_block_t *b, page_hash_latch &latch, + const page_id_t id) noexcept; + /** Decompress a page and relocate the block descriptor @param b buffer-fixed compressed-only ROW_FORMAT=COMPRESSED page @param chain hash table chain for b->id().fold() @@ -1477,7 +1423,6 @@ public: buf_page_t *bpage= page_hash.get(page_id, chain); if (bpage >= &watch[0] && bpage < &watch[UT_ARR_SIZE(watch)]) { - ut_ad(!bpage->in_zip_hash); ut_ad(!bpage->zip.data); if (!allow_watch) bpage= nullptr; @@ -1498,7 +1443,6 @@ public: ut_ad(bpage.in_file()); if (&bpage < &watch[0] || &bpage >= &watch[array_elements(watch)]) return false; - ut_ad(!bpage.in_zip_hash); ut_ad(!bpage.zip.data); return true; } @@ -1539,23 +1483,30 @@ public: inline uint32_t watch_remove(buf_page_t *w, hash_chain &chain) noexcept; /** @return whether less than 1/4 of the buffer pool is available */ - TPOOL_SUPPRESS_TSAN - bool running_out() const noexcept - { - return !recv_recovery_is_on() && - UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) < - (n_chunks_new * chunks->size) / 4; - } + bool running_out() const noexcept; /** @return whether the buffer pool is running low */ bool need_LRU_eviction() const noexcept; - /** @return whether the buffer pool is shrinking */ - inline bool is_shrinking() const noexcept + /** @return number of blocks resize() needs to evict from the buffer pool */ + size_t is_shrinking() const noexcept { - return n_chunks_new < n_chunks; + mysql_mutex_assert_owner(&mutex); + return n_blocks_to_withdraw + UT_LIST_GET_LEN(withdrawn); } + /** @return number of blocks in resize() waiting to be withdrawn */ + size_t to_withdraw() const noexcept + { + mysql_mutex_assert_owner(&mutex); + return n_blocks_to_withdraw; + } + + /** @return the shrinking size of the buffer pool, in bytes + @retval 0 if resize() is not shrinking the buffer pool */ + size_t shrinking_size() const noexcept + { return is_shrinking() ? size_in_bytes_requested : 0; } + #ifdef UNIV_DEBUG /** Validate the buffer pool. */ void validate() noexcept; @@ -1572,7 +1523,6 @@ public: mysql_mutex_assert_owner(&mutex); ut_ad(bpage->in_LRU_list); ut_ad(bpage->in_page_hash); - ut_ad(!bpage->in_zip_hash); ut_ad(bpage->in_file()); lru_hp.adjust(bpage); lru_scan_itr.adjust(bpage); @@ -1592,26 +1542,8 @@ public: /** @name General fields */ /* @{ */ - ulint curr_pool_size; /*!< Current pool size in bytes */ ulint LRU_old_ratio; /*!< Reserve this much of the buffer pool for "old" blocks */ -#ifdef UNIV_DEBUG - ulint buddy_n_frames; /*!< Number of frames allocated from - the buffer pool to the buddy system */ - ulint mutex_exit_forbidden; /*!< Forbid release mutex */ -#endif - ut_allocator allocator; /*!< Allocator used for - allocating memory for the the "chunks" - member. */ - ulint n_chunks; /*!< number of buffer pool chunks */ - ulint n_chunks_new; /*!< new number of buffer pool chunks. - both n_chunks{,new} are protected under - mutex */ - chunk_t* chunks; /*!< buffer pool chunks */ - chunk_t* chunks_old; /*!< old buffer pool chunks to be freed - after resizing buffer pool */ - /** current pool size in pages */ - Atomic_counter curr_size; /** read-ahead request size in pages */ Atomic_counter read_ahead_area; @@ -1723,12 +1655,6 @@ public: /** Look up a page in a hash bucket chain. */ inline buf_page_t *get(const page_id_t id, const hash_chain &chain) const noexcept; - - /** Exclusively aqcuire all latches */ - inline void write_lock_all() noexcept; - - /** Release all latches */ - inline void write_unlock_all() noexcept; }; /** Buffer pool mutex */ @@ -1745,9 +1671,6 @@ public: indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */ page_hash_table page_hash; - /** map of block->frame to buf_block_t blocks that belong - to buf_buddy_alloc(); protected by buf_pool.mutex */ - hash_table_t zip_hash; /** number of pending unzip() */ Atomic_counter n_pend_unzip; @@ -1878,30 +1801,29 @@ public: Set whenever the free list grows, along with a broadcast of done_free. Protected by buf_pool.mutex. */ Atomic_relaxed try_LRU_scan; - /** Whether we have warned to be running out of buffer pool */ - std::atomic_flag LRU_warned; /* @} */ /** @name LRU replacement algorithm fields */ /* @{ */ - UT_LIST_BASE_NODE_T(buf_page_t) free; - /*!< base node of the free - block list */ +private: + /** Whether we have warned to be running out of buffer pool; + only modified by buf_flush_page_cleaner(): + set while holding mutex, cleared while holding flush_list_mutex */ + Atomic_relaxed LRU_warned; + + /** withdrawn blocks during resize() */ + UT_LIST_BASE_NODE_T(buf_page_t) withdrawn; + +public: + /** list of blocks available for allocate() */ + UT_LIST_BASE_NODE_T(buf_page_t) free; + /** broadcast each time when the free list grows or try_LRU_scan is set; protected by mutex */ pthread_cond_t done_free; - UT_LIST_BASE_NODE_T(buf_page_t) withdraw; - /*!< base node of the withdraw - block list. It is only used during - shrinking buffer pool size, not to - reuse the blocks will be removed */ - - ulint withdraw_target;/*!< target length of withdraw - block list, when withdrawing */ - /** "hazard pointer" used during scan of LRU while doing LRU list batch. Protected by buf_pool_t::mutex. */ LRUHp lru_hp; @@ -1942,10 +1864,22 @@ public: /** Sentinels to detect if pages are read into the buffer pool while a delete-buffering operation is pending. Protected by mutex. */ buf_page_t watch[innodb_purge_threads_MAX + 1]; + + /** Clear LRU_warned */ + void LRU_warned_clear() noexcept + { + mysql_mutex_assert_owner(&flush_list_mutex); + LRU_warned= false; + } + /** Reserve a buffer. */ buf_tmp_buffer_t *io_buf_reserve(bool wait_for_reads) noexcept { return io_buf.reserve(wait_for_reads); } + /** Try to allocate a block. + @return a buffer block + @retval nullptr if no blocks are available */ + buf_block_t *allocate() noexcept; /** Remove a block from flush_list. @param bpage buffer pool page */ void delete_from_flush_list(buf_page_t *bpage) noexcept; @@ -1968,6 +1902,10 @@ public: /** Issue a warning that we could not free up buffer pool pages. */ ATTRIBUTE_COLD void LRU_warn() noexcept; + /** Collect buffer pool metadata. + @param pool_info buffer pool metadata */ + void get_info(buf_pool_info_t *pool_info) noexcept; + private: /** Temporary memory for page_compressed and encrypted I/O */ struct io_buf_t @@ -1984,9 +1922,6 @@ private: /** Reserve a buffer */ buf_tmp_buffer_t *reserve(bool wait_for_reads) noexcept; } io_buf; - - /** whether resize() is in the critical path */ - std::atomic resizing; }; /** The InnoDB buffer pool */ @@ -2135,24 +2070,6 @@ inline void buf_page_t::set_old(bool old) noexcept this->old= old; } -#ifdef UNIV_DEBUG -/** Forbid the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_forbid() do { \ - mysql_mutex_assert_owner(&buf_pool.mutex); \ - buf_pool.mutex_exit_forbidden++; \ -} while (0) -/** Allow the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_allow() do { \ - mysql_mutex_assert_owner(&buf_pool.mutex); \ - ut_ad(buf_pool.mutex_exit_forbidden--); \ -} while (0) -#else -/** Forbid the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_forbid() ((void) 0) -/** Allow the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_allow() ((void) 0) -#endif - /********************************************************************** Let us list the consistency conditions for different control block states. diff --git a/storage/innobase/include/buf0buf.inl b/storage/innobase/include/buf0buf.inl index 8103ac2f7ef..c90eee27b62 100644 --- a/storage/innobase/include/buf0buf.inl +++ b/storage/innobase/include/buf0buf.inl @@ -37,7 +37,7 @@ inline bool buf_page_peek_if_young(const buf_page_t *bpage) /* FIXME: bpage->freed_page_clock is 31 bits */ return((buf_pool.freed_page_clock & ((1UL << 31) - 1)) < (bpage->freed_page_clock - + (buf_pool.curr_size + + (buf_pool.curr_size() * (BUF_LRU_OLD_RATIO_DIV - buf_pool.LRU_old_ratio) / (BUF_LRU_OLD_RATIO_DIV * 4)))); } diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index ac4f36066a4..29f41adde1c 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -55,10 +55,6 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) @return true if found and freed */ bool buf_LRU_scan_and_free_block(ulint limit= ULINT_UNDEFINED); -/** @return a buffer block from the buf_pool.free list -@retval NULL if the free list is empty */ -buf_block_t* buf_LRU_get_free_only(); - /** Get a block from the buf_pool.free list. If the list is empty, blocks will be moved from the end of buf_pool.LRU to buf_pool.free. diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index bf3778a33c5..a233a691541 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -223,17 +223,6 @@ extern uint srv_flush_log_at_timeout; extern my_bool srv_adaptive_flushing; extern my_bool srv_flush_sync; -/** Requested size in bytes */ -extern ulint srv_buf_pool_size; -/** Requested buffer pool chunk size */ -extern size_t srv_buf_pool_chunk_unit; -/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/ -/** Previously requested size */ -extern ulint srv_buf_pool_old_size; -/** Current size as scaling factor for the other components */ -extern ulint srv_buf_pool_base_size; -/** Current size in bytes */ -extern ulint srv_buf_pool_curr_size; /** Dump this % of each buffer pool during BP dump */ extern ulong srv_buf_pool_dump_pct; #ifdef UNIV_DEBUG @@ -596,7 +585,7 @@ struct export_var_t{ #endif /* BTR_CUR_HASH_ADAPT */ char innodb_buffer_pool_dump_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool dump status */ char innodb_buffer_pool_load_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool load status */ - char innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */ + char innodb_buffer_pool_resize_status[65];/*!< Buf pool resize status */ my_bool innodb_buffer_pool_load_incomplete;/*!< Buf pool load incomplete */ ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */ ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */ diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 77225b1c809..268871cc5af 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -1330,7 +1330,11 @@ void log_free_check() } } -extern void buf_resize_shutdown(); +#ifdef __linux__ +extern void buf_mem_pressure_shutdown() noexcept; +#else +inline void buf_mem_pressure_shutdown() noexcept {} +#endif /** Make a checkpoint at the latest lsn on shutdown. */ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() @@ -1348,8 +1352,7 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() srv_master_timer.reset(); } - /* Wait for the end of the buffer resize task.*/ - buf_resize_shutdown(); + buf_mem_pressure_shutdown(); dict_stats_shutdown(); btr_defragment_shutdown(); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 3c7b63bdfed..d238d46d23d 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1481,34 +1481,18 @@ inline void recv_sys_t::free(const void *data) ut_ad(!ut_align_offset(data, ALIGNMENT)); mysql_mutex_assert_owner(&mutex); - /* MDEV-14481 FIXME: To prevent race condition with buf_pool.resize(), - we must acquire and hold the buffer pool mutex here. */ - ut_ad(!buf_pool.resize_in_progress()); - - auto *chunk= buf_pool.chunks; - for (auto i= buf_pool.n_chunks; i--; chunk++) + buf_block_t *block= buf_pool.block_from(data); + ut_ad(block->page.frame == page_align(data)); + ut_ad(block->page.state() == buf_page_t::MEMORY); + ut_ad(uint16_t(block->page.free_offset - 1) < srv_page_size); + ut_ad(block->page.used_records); + if (!--block->page.used_records) { - if (data < chunk->blocks->page.frame) - continue; - const size_t offs= (reinterpret_cast(data) - - chunk->blocks->page.frame) >> srv_page_size_shift; - if (offs >= chunk->size) - continue; - buf_block_t *block= &chunk->blocks[offs]; - ut_ad(block->page.frame == page_align(data)); - ut_ad(block->page.state() == buf_page_t::MEMORY); - ut_ad(uint16_t(block->page.free_offset - 1) < srv_page_size); - ut_ad(block->page.used_records); - if (!--block->page.used_records) - { - block->page.hash= nullptr; - UT_LIST_REMOVE(blocks, block); - MEM_MAKE_ADDRESSABLE(block->page.frame, srv_page_size); - buf_block_free(block); - } - return; + block->page.hash= nullptr; + UT_LIST_REMOVE(blocks, block); + MEM_MAKE_ADDRESSABLE(block->page.frame, srv_page_size); + buf_block_free(block); } - ut_ad(0); } @@ -2060,9 +2044,9 @@ ATTRIBUTE_COLD void recv_sys_t::wait_for_pool(size_t pages) mysql_mutex_lock(&mutex); garbage_collect(); mysql_mutex_lock(&buf_pool.mutex); - bool need_more= UT_LIST_GET_LEN(buf_pool.free) < pages; + const size_t available= UT_LIST_GET_LEN(buf_pool.free); mysql_mutex_unlock(&buf_pool.mutex); - if (need_more) + if (available < pages) buf_flush_sync_batch(lsn); } diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index ab3d1a2f76a..bb632bd66b7 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -169,7 +169,7 @@ inline void buf_pool_t::insert_into_flush_list(buf_page_t *prev, else flush_list_bytes+= block->physical_size(); - ut_ad(flush_list_bytes <= curr_pool_size); + ut_ad(flush_list_bytes <= size_in_bytes); if (prev) UT_LIST_INSERT_AFTER(flush_list, prev, &block->page); diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index 62229842ab7..a9bc310753b 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -1366,12 +1366,13 @@ srv_mon_process_existing_counter( /* innodb_buffer_pool_pages_total */ case MONITOR_OVLD_BUF_POOL_PAGE_TOTAL: - value = buf_pool.get_n_pages(); + case MONITOR_OVLD_BUFFER_POOL_SIZE: + value = buf_pool.curr_size(); break; /* innodb_buffer_pool_pages_misc */ case MONITOR_OVLD_BUF_POOL_PAGE_MISC: - value = buf_pool.get_n_pages() + value = buf_pool.curr_size() - UT_LIST_GET_LEN(buf_pool.LRU) - UT_LIST_GET_LEN(buf_pool.free); break; @@ -1490,10 +1491,6 @@ srv_mon_process_existing_counter( value = srv_page_size; break; - case MONITOR_OVLD_BUFFER_POOL_SIZE: - value = srv_buf_pool_size; - break; - /* innodb_row_lock_current_waits */ case MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT: // dirty read without lock_sys.wait_mutex diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index f73a95d5226..3dbe680a340 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -178,16 +178,6 @@ srv_printf_innodb_monitor() will request mutex acquisition with mysql_mutex_lock(), which will wait until it gets the mutex. */ #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) -/** copy of innodb_buffer_pool_size */ -ulint srv_buf_pool_size; -/** Requested buffer pool chunk size */ -size_t srv_buf_pool_chunk_unit; -/** Previously requested size */ -ulint srv_buf_pool_old_size; -/** Current size as scaling factor for the other components */ -ulint srv_buf_pool_base_size; -/** Current size in bytes */ -ulint srv_buf_pool_curr_size; /** Dump this % of each buffer pool during BP dump */ ulong srv_buf_pool_dump_pct; /** Abort load after this amount of pages */ @@ -901,6 +891,7 @@ srv_export_innodb_status(void) export_vars.innodb_buffer_pool_read_requests = buf_pool.stat.n_page_gets; + mysql_mutex_lock(&buf_pool.mutex); export_vars.innodb_buffer_pool_bytes_data = buf_pool.stat.LRU_bytes + (UT_LIST_GET_LEN(buf_pool.unzip_LRU) @@ -910,12 +901,21 @@ srv_export_innodb_status(void) export_vars.innodb_buffer_pool_pages_latched = buf_get_latched_pages_number(); #endif /* UNIV_DEBUG */ - export_vars.innodb_buffer_pool_pages_total = buf_pool.get_n_pages(); + export_vars.innodb_buffer_pool_pages_total = buf_pool.curr_size(); export_vars.innodb_buffer_pool_pages_misc = - buf_pool.get_n_pages() + export_vars.innodb_buffer_pool_pages_total - UT_LIST_GET_LEN(buf_pool.LRU) - UT_LIST_GET_LEN(buf_pool.free); + if (size_t shrinking = buf_pool.is_shrinking()) { + snprintf(export_vars.innodb_buffer_pool_resize_status, + sizeof export_vars.innodb_buffer_pool_resize_status, + "Withdrawing blocks. (%zu/%zu).", + buf_pool.to_withdraw(), shrinking); + } else { + export_vars.innodb_buffer_pool_resize_status[0] = '\0'; + } + mysql_mutex_unlock(&buf_pool.mutex); export_vars.innodb_max_trx_id = trx_sys.get_max_trx_id(); export_vars.innodb_history_list_length = trx_sys.history_size_approx(); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index e32ed6e4710..dc2ad75848f 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1288,31 +1288,10 @@ dberr_t srv_start(bool create_new_db) fil_system.create(srv_file_per_table ? 50000 : 5000); - ib::info() << "Initializing buffer pool, total size = " - << ib::bytes_iec{srv_buf_pool_size} - << ", chunk size = " << ib::bytes_iec{srv_buf_pool_chunk_unit}; - if (buf_pool.create()) { - ib::error() << "Cannot allocate memory for the buffer pool"; - return(srv_init_abort(DB_ERROR)); } - ib::info() << "Completed initialization of buffer pool"; - -#ifdef UNIV_DEBUG - /* We have observed deadlocks with a 5MB buffer pool but - the actual lower limit could very well be a little higher. */ - - if (srv_buf_pool_size <= 5 * 1024 * 1024) { - - ib::info() << "Small buffer pool size (" - << ib::bytes_iec{srv_buf_pool_size} - << "), the flst_validate() debug function can cause a" - << " deadlock if the buffer pool fills up."; - } -#endif /* UNIV_DEBUG */ - log_sys.create(); recv_sys.create(); lock_sys.create(srv_lock_table_size); diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index d6feb820a62..0b9b5e3951c 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -1237,9 +1237,6 @@ static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd, static_cast(thd_mdl_context(thd)); ut_ad(mdl_context); - const size_t max_pages= - std::min(buf_pool.curr_size * 3 / 4, size_t{srv_purge_batch_size}); - while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown) { /* Track the max {trx_id, undo_no} for truncating the @@ -1289,7 +1286,9 @@ static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd, ut_ad(!table_node->in_progress); } - if (purge_sys.n_pages_handled() >= max_pages) + const size_t size{purge_sys.n_pages_handled()}; + if (size >= size_t{srv_purge_batch_size} || + size >= buf_pool.usable_size() * 3 / 4) break; }