1
0
mirror of https://github.com/MariaDB/server.git synced 2025-12-24 11:21:21 +03:00

MDEV-29694 Remove the InnoDB change buffer

The purpose of the change buffer was to reduce random disk access,
which could be useful on rotational storage, but maybe less so on
solid-state storage.
When we wished to
(1) insert a record into a non-unique secondary index,
(2) delete-mark a secondary index record,
(3) delete a secondary index record as part of purge (but not ROLLBACK),
and the B-tree leaf page where the record belongs to is not in the buffer
pool, we inserted a record into the change buffer B-tree, indexed by
the page identifier. When the page was eventually read into the buffer
pool, we looked up the change buffer B-tree for any modifications to the
page, applied these upon the completion of the read operation. This
was called the insert buffer merge.

We remove the change buffer, because it has been the source of
various hard-to-reproduce corruption bugs, including those fixed in
commit 5b9ee8d819 and
commit 165564d3c3 but not limited to them.

A downgrade will fail with a clear message starting with
commit db14eb16f9 (MDEV-30106).

buf_page_t::state: Merge IBUF_EXIST to UNFIXED and
WRITE_FIX_IBUF to WRITE_FIX.

buf_pool_t::watch[]: Remove.

trx_t: Move isolation_level, check_foreigns, check_unique_secondary,
bulk_insert into the same bit-field. The only purpose of
trx_t::check_unique_secondary is to enable bulk insert into an
empty table. It no longer enables insert buffering for UNIQUE INDEX.

btr_cur_t::thr: Remove. This field was originally needed for change
buffering. Later, its use was extended to cover SPATIAL INDEX.
Much of the time, rtr_info::thr holds this field. When it does not,
we will add parameters to SPATIAL INDEX specific functions.

ibuf_upgrade_needed(): Check if the change buffer needs to be updated.

ibuf_upgrade(): Merge and upgrade the change buffer after all redo log
has been applied. Free any pages consumed by the change buffer, and
zero out the change buffer root page to mark the upgrade completed,
and to prevent a downgrade to an earlier version.

dict_load_tablespaces(): Renamed from
dict_check_tablespaces_and_store_max_id(). This needs to be invoked
before ibuf_upgrade().

btr_cur_open_at_rnd_pos(): Specialize for use in persistent statistics.
The change buffer merge does not need this function anymore.

btr_page_alloc(): Renamed from btr_page_alloc_low(). We no longer
allocate any change buffer pages.

btr_cur_open_at_rnd_pos(): Specialize for use in persistent statistics.
The change buffer merge does not need this function anymore.

row_search_index_entry(), btr_lift_page_up(): Add a parameter thr
for the SPATIAL INDEX case.

rtr_page_split_and_insert(): Specialized from btr_page_split_and_insert().

rtr_root_raise_and_insert(): Specialized from btr_root_raise_and_insert().

Note: The support for upgrading from the MySQL 3.23 or MySQL 4.0
change buffer format that predates the MySQL 4.1 introduction of
the option innodb_file_per_table was removed in MySQL 5.6.5
as part of mysql/mysql-server@69b6241a79
and MariaDB 10.0.11 as part of 1d0f70c2f8.

In the tests innodb.log_upgrade and innodb.log_corruption, we create
valid (upgraded) change buffer pages.

Tested by: Matthias Leich
This commit is contained in:
Marko Mäkelä
2023-01-11 17:59:36 +02:00
parent 24648768b4
commit f27e9c8947
122 changed files with 1655 additions and 9292 deletions

View File

@@ -2169,12 +2169,6 @@ static bool innodb_init_param()
srv_print_verbose_log = verbose ? 2 : 1;
/* Store the default charset-collation number of this MySQL
installation */
/* We cannot treat characterset here for now!! */
data_mysql_default_charset_coll = (ulint)default_charset_info->number;
ut_ad(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
#ifdef _WIN32

View File

@@ -1,4 +1,3 @@
--innodb-change-buffering=all
--innodb-encrypt-tables=on
--innodb-tablespaces-encryption
--innodb-encryption-threads=2

View File

@@ -1,2 +1 @@
--innodb-change-buffering=none
--innodb-default-encryption-key-id=20

View File

@@ -1,53 +0,0 @@
SET @buffering= @@innodb_change_buffering;
SET GLOBAL innodb_change_buffering= deletes;
Warnings:
Warning 1287 '@@innodb_change_buffering' is deprecated and will be removed in a future release
SET @flush= @@innodb_flush_log_at_trx_commit;
SET GLOBAL innodb_flush_log_at_trx_commit= 0;
CREATE TABLE t1 (
a varchar(1024),
b varchar(1024),
c varchar(1024),
d varchar(1024),
e varchar(1024),
f varchar(1024),
g varchar(1024),
h varchar(1024),
key (a),
key (b),
key (c),
key (d)
) ENGINE=InnoDB;
INSERT INTO t1
SELECT REPEAT('x',10), REPEAT('x',13), REPEAT('x',427), REPEAT('x',244),
REPEAT('x',9), REPEAT('x',112), REPEAT('x',814), REPEAT('x',633)
FROM seq_1_to_1024;
CREATE TEMPORARY TABLE t2 (
a varchar(1024),
b varchar(1024),
c varchar(1024),
d varchar(1024),
e varchar(1024),
f varchar(1024),
g varchar(1024),
h varchar(1024),
i varchar(1024),
j varchar(1024),
k varchar(1024),
l varchar(1024),
m varchar(1024),
key (a),
key (b),
key (c),
key (d),
key (e),
key (f)
) ENGINE=InnoDB;
SET @x=REPEAT('x',512);
INSERT INTO t2 SELECT @x, @x, @x, @x, @x, @x, @x, @x, @x, @x, @x, @x, @x
FROM seq_1_to_768;
DROP TABLE t1, t2;
SET GLOBAL innodb_change_buffering= @buffering;
Warnings:
Warning 1287 '@@innodb_change_buffering' is deprecated and will be removed in a future release
SET GLOBAL innodb_flush_log_at_trx_commit= @flush;

View File

@@ -1,20 +0,0 @@
CREATE TABLE t1(
a INT AUTO_INCREMENT PRIMARY KEY,
b CHAR(1),
c INT,
INDEX(b))
ENGINE=InnoDB STATS_PERSISTENT=0;
SET GLOBAL innodb_change_buffering_debug = 1;
SET GLOBAL innodb_change_buffering=all;
Warnings:
Warning 1287 '@@innodb_change_buffering' is deprecated and will be removed in a future release
INSERT INTO t1 SELECT 0,'x',1 FROM seq_1_to_1024;
# restart: --innodb-force-recovery=6 --innodb-change-buffer-dump
check table t1;
Table Op Msg_type Msg_text
test.t1 check Warning InnoDB: Index 'b' contains 990 entries, should be 1024.
test.t1 check error Corrupt
# restart: --innodb-force_recovery=0
SET GLOBAL innodb_fast_shutdown=0;
# restart: --innodb-force_recovery=0
DROP TABLE t1;

View File

@@ -1,51 +0,0 @@
#
# Bug#69122 - INNODB DOESN'T REDO-LOG INSERT BUFFER MERGE
# OPERATION IF IT IS DONE IN-PLACE
#
call mtr.add_suppression("InnoDB: innodb_read_only prevents crash recovery");
call mtr.add_suppression("Plugin initialization aborted at srv0start\\.cc");
call mtr.add_suppression("Plugin 'InnoDB'");
FLUSH TABLES;
CREATE TABLE t1(
a INT AUTO_INCREMENT PRIMARY KEY,
b CHAR(1),
c INT,
INDEX(b))
ENGINE=InnoDB STATS_PERSISTENT=0;
SET GLOBAL innodb_change_buffering_debug = 1;
SET GLOBAL innodb_change_buffering = all;
Warnings:
Warning 1287 '@@innodb_change_buffering' is deprecated and will be removed in a future release
INSERT INTO t1 SELECT 0,'x',1 FROM seq_1_to_8192;
BEGIN;
SELECT b FROM t1 LIMIT 3;
b
x
x
x
connect con1,localhost,root,,;
BEGIN;
DELETE FROM t1 WHERE a=1;
INSERT INTO t1 VALUES(1,'X',1);
SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace';
SELECT b FROM t1 LIMIT 3;
ERROR HY000: Lost connection to server during query
disconnect con1;
connection default;
FOUND 1 /Wrote log record for ibuf update in place operation/ in mysqld.1.err
# restart: --innodb-read-only
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check Error Unknown storage engine 'InnoDB'
test.t1 check error Corrupt
FOUND 1 /innodb_read_only prevents crash recovery/ in mysqld.1.err
# restart: --innodb-force-recovery=5 --debug-dbug=d,crash_after_log_ibuf_upd_inplace
SELECT * FROM t1 LIMIT 1;
a b c
1 X 1
SET GLOBAL innodb_fast_shutdown=0;
# restart
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check status OK
DROP TABLE t1;

View File

@@ -454,11 +454,6 @@ ALTER TABLE t1 IMPORT TABLESPACE;
ERROR HY000: Got error 42 'Tablespace not found' from ./test/t1.ibd
SET SESSION debug_dbug=@saved_debug_dbug;
restore: t1 .ibd and .cfg files
SET SESSION debug_dbug="+d,ib_import_check_bitmap_failure";
ALTER TABLE t1 IMPORT TABLESPACE;
ERROR HY000: Index for table 't1' is corrupt; try to repair it
SET SESSION debug_dbug=@saved_debug_dbug;
restore: t1 .ibd and .cfg files
SET SESSION debug_dbug="+d,ib_import_cluster_root_adjust_failure";
ALTER TABLE t1 IMPORT TABLESPACE;
ERROR HY000: Index for table 't1' is corrupt; try to repair it

View File

@@ -1,18 +0,0 @@
CREATE TABLE bug59733(a INT AUTO_INCREMENT PRIMARY KEY,b CHAR(1))ENGINE=InnoDB;
INSERT INTO bug59733 VALUES(0,'x');
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
CREATE INDEX b ON bug59733 (b);
DELETE FROM bug59733 WHERE (a%100)=0;
DROP INDEX b ON bug59733;
CREATE INDEX b ON bug59733 (b);
DROP TABLE bug59733;

View File

@@ -116,12 +116,8 @@ buffer_LRU_unzip_search_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL N
buffer_LRU_unzip_search_scanned_per_call buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 set_member Page scanned per single LRU unzip search
buffer_page_read_index_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Leaf Pages read
buffer_page_read_index_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Non-leaf Pages read
buffer_page_read_index_ibuf_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Index Leaf Pages read
buffer_page_read_index_ibuf_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Index Non-Leaf Pages read
buffer_page_read_undo_log buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Undo Log Pages read
buffer_page_read_index_inode buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Inode Pages read
buffer_page_read_ibuf_free_list buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Free List Pages read
buffer_page_read_ibuf_bitmap buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Bitmap Pages read
buffer_page_read_system_page buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of System Pages read
buffer_page_read_trx_system buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Transaction System Pages read
buffer_page_read_fsp_hdr buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of File Space Header Pages read
@@ -132,12 +128,8 @@ buffer_page_read_zblob2 buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NU
buffer_page_read_other buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of other/unknown (old version of InnoDB) Pages read
buffer_page_written_index_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Leaf Pages written
buffer_page_written_index_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Non-leaf Pages written
buffer_page_written_index_ibuf_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Index Leaf Pages written
buffer_page_written_index_ibuf_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Index Non-Leaf Pages written
buffer_page_written_undo_log buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Undo Log Pages written
buffer_page_written_index_inode buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Inode Pages written
buffer_page_written_ibuf_free_list buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Free List Pages written
buffer_page_written_ibuf_bitmap buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Bitmap Pages written
buffer_page_written_system_page buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of System Pages written
buffer_page_written_trx_system buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Transaction System Pages written
buffer_page_written_fsp_hdr buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of File Space Header Pages written
@@ -205,14 +197,6 @@ adaptive_hash_rows_removed adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL
adaptive_hash_rows_deleted_no_hash_entry adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of rows deleted that did not have corresponding Adaptive Hash Index entries
adaptive_hash_rows_updated adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Adaptive Hash Index rows updated
file_num_open_files file_system 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Number of files currently open (innodb_num_open_files)
ibuf_merges_insert change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of inserted records merged by change buffering
ibuf_merges_delete_mark change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of deleted records merged by change buffering
ibuf_merges_delete change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of purge records merged by change buffering
ibuf_merges_discard_insert change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of insert merged operations discarded
ibuf_merges_discard_delete_mark change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of deleted merged operations discarded
ibuf_merges_discard_delete change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of purge merged operations discarded
ibuf_merges change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of change buffer merges
ibuf_size change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Change buffer size in pages
innodb_master_thread_sleeps server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of times (seconds) master thread sleeps
innodb_activity_count server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Current server activity count
innodb_master_active_loops server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of times master thread performs its tasks when server is active

View File

@@ -44,16 +44,6 @@ INNODB_DBLWR_PAGES_WRITTEN
INNODB_DBLWR_WRITES
INNODB_DEADLOCKS
INNODB_HISTORY_LIST_LENGTH
INNODB_IBUF_DISCARDED_DELETE_MARKS
INNODB_IBUF_DISCARDED_DELETES
INNODB_IBUF_DISCARDED_INSERTS
INNODB_IBUF_FREE_LIST
INNODB_IBUF_MERGED_DELETE_MARKS
INNODB_IBUF_MERGED_DELETES
INNODB_IBUF_MERGED_INSERTS
INNODB_IBUF_MERGES
INNODB_IBUF_SEGMENT_SIZE
INNODB_IBUF_SIZE
INNODB_LOG_WAITS
INNODB_LOG_WRITE_REQUESTS
INNODB_LOG_WRITES

View File

@@ -2,12 +2,10 @@
# Bug#19904003 INNODB_LIMIT_OPTIMISTIC_INSERT_DEBUG=1
# CAUSES INFINITE PAGE SPLIT
#
SET GLOBAL innodb_change_buffering_debug=1;
SET GLOBAL innodb_limit_optimistic_insert_debug=1;
CREATE TABLE t1(c1 INT PRIMARY KEY) ENGINE=InnoDB
PARTITION BY HASH (c1) PARTITIONS 15;
DROP TABLE t1;
SET GLOBAL innodb_change_buffering_debug=0;
SET GLOBAL innodb_limit_optimistic_insert_debug=0;
#
# Bug#25082593 FOREIGN KEY VALIDATION DOESN'T NEED

View File

@@ -1,24 +0,0 @@
CREATE TABLE t1(
a INT AUTO_INCREMENT PRIMARY KEY,
b CHAR(255),
INDEX(b))
ENGINE=InnoDB;
INSERT INTO t1(b) SELECT UUID();
BEGIN;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
INSERT INTO t1(b) SELECT UUID() FROM t1;
COMMIT;
UPDATE t1 SET b=UUID();
DROP TABLE t1;

View File

@@ -82,12 +82,8 @@ buffer_LRU_unzip_search_num_scan disabled
buffer_LRU_unzip_search_scanned_per_call disabled
buffer_page_read_index_leaf disabled
buffer_page_read_index_non_leaf disabled
buffer_page_read_index_ibuf_leaf disabled
buffer_page_read_index_ibuf_non_leaf disabled
buffer_page_read_undo_log disabled
buffer_page_read_index_inode disabled
buffer_page_read_ibuf_free_list disabled
buffer_page_read_ibuf_bitmap disabled
buffer_page_read_system_page disabled
buffer_page_read_trx_system disabled
buffer_page_read_fsp_hdr disabled
@@ -98,12 +94,8 @@ buffer_page_read_zblob2 disabled
buffer_page_read_other disabled
buffer_page_written_index_leaf disabled
buffer_page_written_index_non_leaf disabled
buffer_page_written_index_ibuf_leaf disabled
buffer_page_written_index_ibuf_non_leaf disabled
buffer_page_written_undo_log disabled
buffer_page_written_index_inode disabled
buffer_page_written_ibuf_free_list disabled
buffer_page_written_ibuf_bitmap disabled
buffer_page_written_system_page disabled
buffer_page_written_trx_system disabled
buffer_page_written_fsp_hdr disabled
@@ -171,14 +163,6 @@ adaptive_hash_rows_removed disabled
adaptive_hash_rows_deleted_no_hash_entry disabled
adaptive_hash_rows_updated disabled
file_num_open_files disabled
ibuf_merges_insert disabled
ibuf_merges_delete_mark disabled
ibuf_merges_delete disabled
ibuf_merges_discard_insert disabled
ibuf_merges_discard_delete_mark disabled
ibuf_merges_discard_delete disabled
ibuf_merges disabled
ibuf_size disabled
innodb_master_thread_sleeps disabled
innodb_activity_count disabled
innodb_master_active_loops disabled

View File

@@ -25,7 +25,6 @@ c varchar(150), index k1(c(99), b(56)), index k2(b(5), c(10))) engine=InnoDB
row_format=redundant;
insert into t3 values(444, 'dddd', 'bbbbb', 'aaaaa');
insert into t3 values(555, 'eeee', 'ccccc', 'aaaaa');
SET GLOBAL innodb_fast_shutdown=0;
# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/row_format_redundant --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/row_format_redundant --innodb-data-file-path=ibdata1:1M:autoextend --innodb-undo-tablespaces=0 --innodb-stats-persistent=0 --innodb-read-only
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -68,7 +67,7 @@ DROP TABLE t1;
Warnings:
Warning 1932 Table 'test.t1' doesn't exist in engine
DROP TABLE t2,t3;
FOUND 6 /\[ERROR\] InnoDB: Table test/t1 in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=1 SYS_TABLES\.MIX_LEN=511\b/ in mysqld.1.err
FOUND 1 /\[ERROR\] InnoDB: Table test/t1 in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=1 SYS_TABLES\.MIX_LEN=511\b.*/ in mysqld.1.err
# restart
ib_buffer_pool
ib_logfile0

View File

@@ -1,67 +0,0 @@
--source include/have_innodb.inc
--source include/have_sequence.inc
SET @buffering= @@innodb_change_buffering;
SET GLOBAL innodb_change_buffering= deletes;
SET @flush= @@innodb_flush_log_at_trx_commit;
SET GLOBAL innodb_flush_log_at_trx_commit= 0;
CREATE TABLE t1 (
a varchar(1024),
b varchar(1024),
c varchar(1024),
d varchar(1024),
e varchar(1024),
f varchar(1024),
g varchar(1024),
h varchar(1024),
key (a),
key (b),
key (c),
key (d)
) ENGINE=InnoDB;
INSERT INTO t1
SELECT REPEAT('x',10), REPEAT('x',13), REPEAT('x',427), REPEAT('x',244),
REPEAT('x',9), REPEAT('x',112), REPEAT('x',814), REPEAT('x',633)
FROM seq_1_to_1024;
CREATE TEMPORARY TABLE t2 (
a varchar(1024),
b varchar(1024),
c varchar(1024),
d varchar(1024),
e varchar(1024),
f varchar(1024),
g varchar(1024),
h varchar(1024),
i varchar(1024),
j varchar(1024),
k varchar(1024),
l varchar(1024),
m varchar(1024),
key (a),
key (b),
key (c),
key (d),
key (e),
key (f)
) ENGINE=InnoDB;
SET @x=REPEAT('x',512);
INSERT INTO t2 SELECT @x, @x, @x, @x, @x, @x, @x, @x, @x, @x, @x, @x, @x
FROM seq_1_to_768;
--disable_query_log
--let $run=1024
while ($run)
{
eval DELETE FROM t1 LIMIT 1 /* $run */;
--dec $run
}
--enable_query_log
# Cleanup
DROP TABLE t1, t2;
SET GLOBAL innodb_change_buffering= @buffering;
SET GLOBAL innodb_flush_log_at_trx_commit= @flush;

View File

@@ -1,9 +0,0 @@
[strict_crc32]
--innodb-checksum-algorithm=strict_crc32
--innodb-page-size=4k
--innodb-force-recovery=2
[strict_full_crc32]
--innodb-checksum-algorithm=strict_full_crc32
--innodb-page-size=4k
--innodb-force-recovery=2

View File

@@ -1,117 +0,0 @@
--source include/have_innodb.inc
--source include/no_valgrind_without_big.inc
# innodb_change_buffering_debug option is debug only
--source include/have_debug.inc
# Embedded server tests do not support restarting
--source include/not_embedded.inc
--source include/have_sequence.inc
--disable_query_log
call mtr.add_suppression("InnoDB: Failed to find tablespace for table `test`\\.`t1` in the cache\\. Attempting to load the tablespace with space id");
call mtr.add_suppression("InnoDB: Allocated tablespace ID \\d+ for test.t1, old maximum was");
call mtr.add_suppression("InnoDB: Failed to find tablespace for table `mysql`\\.`transaction_registry` in the cache\\. Attempting to load the tablespace with space id");
call mtr.add_suppression("InnoDB: Allocated tablespace ID \\d+ for mysql.transaction_registry, old maximum was");
call mtr.add_suppression("InnoDB: Trying to read 4096 bytes");
call mtr.add_suppression("InnoDB: File './test/t1.ibd' is corrupted");
--enable_query_log
CREATE TABLE t1(
a INT AUTO_INCREMENT PRIMARY KEY,
b CHAR(1),
c INT,
INDEX(b))
ENGINE=InnoDB STATS_PERSISTENT=0;
# The flag innodb_change_buffering_debug is only available in debug builds.
# It instructs InnoDB to try to evict pages from the buffer pool when
# change buffering is possible, so that the change buffer will be used
# whenever possible.
SET GLOBAL innodb_change_buffering_debug = 1;
SET GLOBAL innodb_change_buffering=all;
# Create enough rows for the table, so that the change buffer will be
# used for modifying the secondary index page. There must be multiple
# index pages, because changes to the root page are never buffered.
INSERT INTO t1 SELECT 0,'x',1 FROM seq_1_to_1024;
let MYSQLD_DATADIR=`select @@datadir`;
let PAGE_SIZE=`select @@innodb_page_size`;
--source include/shutdown_mysqld.inc
# Corrupt the change buffer bitmap, to claim that pages are clean
perl;
do "$ENV{MTR_SUITE_DIR}/include/crc32.pl";
my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd";
open(FILE, "+<$file") || die "Unable to open $file";
binmode FILE;
my $ps= $ENV{PAGE_SIZE};
my $page;
die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps;
my $full_crc32 = unpack("N",substr($page,54,4)) & 0x10; # FIL_SPACE_FLAGS
die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps;
# Clean the change buffer bitmap.
substr($page,38,$ps - 38 - 8) = chr(0) x ($ps - 38 - 8);
my $polynomial = 0x82f63b78; # CRC-32C
if ($full_crc32)
{
my $ck = mycrc32(substr($page, 0, $ps-4), 0, $polynomial);
substr($page, $ps-4, 4) = pack("N", $ck);
}
else
{
my $ck= pack("N",mycrc32(substr($page, 4, 22), 0, $polynomial) ^
mycrc32(substr($page, 38, $ps - 38 - 8), 0, $polynomial));
substr($page,0,4)=$ck;
substr($page,$ps-8,4)=$ck;
}
sysseek(FILE, $ps, 0) || die "Unable to rewind $file\n";
syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n";
close(FILE) || die "Unable to close $file";
EOF
--let $restart_parameters= --innodb-force-recovery=6 --innodb-change-buffer-dump
--source include/start_mysqld.inc
--replace_regex /contains \d+ entries/contains 990 entries/
check table t1;
--source include/shutdown_mysqld.inc
# Truncate the file to 5 pages, as if it were empty
perl;
do "$ENV{MTR_SUITE_DIR}/include/crc32.pl";
my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd";
open(FILE, "+<$file") || die "Unable to open $file";
binmode FILE;
my $ps= $ENV{PAGE_SIZE};
my $pages=5;
my $page;
die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps;
my $full_crc32 = unpack("N",substr($page,54,4)) & 0x10; # FIL_SPACE_FLAGS
substr($page,46,4)=pack("N", $pages);
my $polynomial = 0x82f63b78; # CRC-32C
if ($full_crc32)
{
my $ck = mycrc32(substr($page, 0, $ps-4), 0, $polynomial);
substr($page, $ps-4, 4) = pack("N", $ck);
}
else
{
my $ck= pack("N",mycrc32(substr($page, 4, 22), 0, $polynomial) ^
mycrc32(substr($page, 38, $ps - 38 - 8), 0, $polynomial));
substr($page,0,4)=$ck;
substr($page,$ps-8,4)=$ck;
}
sysseek(FILE, 0, 0) || die "Unable to rewind $file\n";
syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n";
truncate(FILE, $ps * $pages);
close(FILE) || die "Unable to close $file";
EOF
--let $restart_parameters=--innodb-force_recovery=0
--source include/start_mysqld.inc
SET GLOBAL innodb_fast_shutdown=0;
--source include/restart_mysqld.inc
# Cleanup
DROP TABLE t1;

View File

@@ -1 +0,0 @@
--innodb_buffer_pool_size=24M

View File

@@ -1,78 +0,0 @@
--echo #
--echo # Bug#69122 - INNODB DOESN'T REDO-LOG INSERT BUFFER MERGE
--echo # OPERATION IF IT IS DONE IN-PLACE
--echo #
--source include/have_innodb.inc
# innodb_change_buffering_debug option is debug only
--source include/have_debug.inc
# Embedded server does not support crashing
--source include/not_embedded.inc
# DBUG_SUICIDE() hangs under valgrind
--source include/not_valgrind.inc
# This test is slow on buildbot.
--source include/big_test.inc
--source include/have_sequence.inc
call mtr.add_suppression("InnoDB: innodb_read_only prevents crash recovery");
call mtr.add_suppression("Plugin initialization aborted at srv0start\\.cc");
call mtr.add_suppression("Plugin 'InnoDB'");
FLUSH TABLES;
CREATE TABLE t1(
a INT AUTO_INCREMENT PRIMARY KEY,
b CHAR(1),
c INT,
INDEX(b))
ENGINE=InnoDB STATS_PERSISTENT=0;
--let $_server_id= `SELECT @@server_id`
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
# The flag innodb_change_buffering_debug is only available in debug builds.
# It instructs InnoDB to try to evict pages from the buffer pool when
# change buffering is possible, so that the change buffer will be used
# whenever possible.
SET GLOBAL innodb_change_buffering_debug = 1;
SET GLOBAL innodb_change_buffering = all;
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
# Create enough rows for the table, so that the change buffer will be
# used for modifying the secondary index page. There must be multiple
# index pages, because changes to the root page are never buffered.
INSERT INTO t1 SELECT 0,'x',1 FROM seq_1_to_8192;
BEGIN;
SELECT b FROM t1 LIMIT 3;
connect (con1,localhost,root,,);
BEGIN;
DELETE FROM t1 WHERE a=1;
# This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
INSERT INTO t1 VALUES(1,'X',1);
SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace';
--exec echo "wait" > $_expect_file_name
--error 2013
# This should force a change buffer merge
SELECT b FROM t1 LIMIT 3;
disconnect con1;
connection default;
let SEARCH_PATTERN=Wrote log record for ibuf update in place operation;
--source include/search_pattern_in_file.inc
--let $restart_parameters= --innodb-read-only
--source include/start_mysqld.inc
CHECK TABLE t1;
--source include/shutdown_mysqld.inc
let SEARCH_PATTERN=innodb_read_only prevents crash recovery;
--source include/search_pattern_in_file.inc
--let $restart_parameters= --innodb-force-recovery=5 --debug-dbug=d,crash_after_log_ibuf_upd_inplace
--source include/start_mysqld.inc
SELECT * FROM t1 LIMIT 1;
# Slow shutdown will not merge the changes due to innodb_force_recovery=5.
SET GLOBAL innodb_fast_shutdown=0;
--let $restart_parameters=
--source include/restart_mysqld.inc
CHECK TABLE t1;
DROP TABLE t1;

View File

@@ -952,21 +952,6 @@ do "$ENV{MTR_SUITE_DIR}/include/innodb-util.pl";
ib_restore_tablespaces("test", "t1");
EOF
# Test failure after ibuf check
SET SESSION debug_dbug="+d,ib_import_check_bitmap_failure";
# Need proper mapping of error codes :-(
--error ER_NOT_KEYFILE
ALTER TABLE t1 IMPORT TABLESPACE;
SET SESSION debug_dbug=@saved_debug_dbug;
# Restore files
perl;
do "$ENV{MTR_SUITE_DIR}/include/innodb-util.pl";
ib_restore_tablespaces("test", "t1");
EOF
# Test failure after adjusting the cluster index root page
SET SESSION debug_dbug="+d,ib_import_cluster_root_adjust_failure";

View File

@@ -69,7 +69,7 @@ EOF
--move_file $file $file.now
# Complete purge (and change buffer merge).
# Complete purge.
SET GLOBAL innodb_fast_shutdown=0;
--source include/shutdown_mysqld.inc

View File

@@ -1,53 +0,0 @@
#
# Bug #59733 Possible deadlock when buffered changes are to be discarded
# in buf_page_create
#
-- source include/have_innodb.inc
-- disable_query_log
# The flag innodb_change_buffering_debug is only available in debug builds.
# It instructs InnoDB to try to evict pages from the buffer pool when
# change buffering is possible, so that the change buffer will be used
# whenever possible.
-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug;
-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET GLOBAL innodb_change_buffering_debug = 1;
-- enable_query_log
CREATE TABLE bug59733(a INT AUTO_INCREMENT PRIMARY KEY,b CHAR(1))ENGINE=InnoDB;
# Create enough rows for the table, so that the insert buffer will be
# used. There must be multiple index pages, because changes to the
# root page are never buffered.
INSERT INTO bug59733 VALUES(0,'x');
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
INSERT INTO bug59733 SELECT 0,b FROM bug59733;
# Create the secondary index for which changes will be buffered.
CREATE INDEX b ON bug59733 (b);
# This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
DELETE FROM bug59733 WHERE (a%100)=0;
# Drop the index in order to get free pages with orphaned buffered changes.
DROP INDEX b ON bug59733;
# Create the index and attempt to reuse pages for which buffered changes exist.
CREATE INDEX b ON bug59733 (b);
DROP TABLE bug59733;
-- disable_query_log
-- error 0, ER_UNKNOWN_SYSTEM_VARIABLE
SET GLOBAL innodb_change_buffering_debug = @innodb_change_buffering_debug_orig;

View File

@@ -7,13 +7,11 @@
--echo # CAUSES INFINITE PAGE SPLIT
--echo #
SET GLOBAL innodb_change_buffering_debug=1;
SET GLOBAL innodb_limit_optimistic_insert_debug=1;
CREATE TABLE t1(c1 INT PRIMARY KEY) ENGINE=InnoDB
PARTITION BY HASH (c1) PARTITIONS 15;
DROP TABLE t1;
SET GLOBAL innodb_change_buffering_debug=0;
SET GLOBAL innodb_limit_optimistic_insert_debug=0;
--echo #

View File

@@ -57,16 +57,24 @@ my $head = pack("Nx[18]", 0);
my $body = pack("x[8]Nx[10]Nx[16312]", 768, 97937874);
my $ck = mycrc32($head, 0, $polynomial) ^ mycrc32($body, 0, $polynomial);
print OUT pack("N",$ck).$head.pack("x[12]").$body.pack("Nx[4]",$ck);
# Dummy pages 1..6.
$body = pack("x[16338]");
for (my($page) = 1; $page < 7; $page++)
{
## FIL_PAGE_OFFSET
$head = pack("Nx[18]", $page);
$ck = mycrc32($head, 0, $polynomial) ^ mycrc32($body, 0, $polynomial);
print OUT pack("N",$ck).$head.pack("x[16350]Nx[4]",$ck);
}
# Dummy change buffer header page (page 3).
die unless seek(OUT, 3 * 16384, 0);
## FIL_PAGE_OFFSET, FIL_PAGE_PREV, FIL_PAGE_NEXT, FIL_PAGE_TYPE
my $head = pack("NNNx[8]n", 3, 0xffffffff, 0xffffffff, 6);
my $body = pack("x[62]nnx[16272]", 2, 50);
my $ck = mycrc32($head, 0, $polynomial) ^ mycrc32($body, 0, $polynomial);
print OUT pack("N",$ck).$head.pack("x[12]").$body.pack("Nx[4]",$ck);
# Dummy change buffer root page (page 4).
## FIL_PAGE_OFFSET, FIL_PAGE_PREV, FIL_PAGE_NEXT
my $head = pack("NNNx[10]", 4, 0xffffffff, 0xffffffff);
my $body = chr(0) x 16338;
my $ck = mycrc32($head, 0, $polynomial) ^ mycrc32($body, 0, $polynomial);
print OUT pack("N",$ck).$head.pack("x[12]").$body.pack("Nx[4]",$ck);
# Dictionary header page (page 7).
die unless seek(OUT, 7 * 16384, 0);
## FIL_PAGE_OFFSET
$head = pack("Nx[18]", 7);
## DICT_HDR_TABLES,DICT_HDR_INDEXES

View File

@@ -45,9 +45,24 @@ my $head = pack("Nx[18]", 0);
my $body = pack("x[8]Nx[10]Nx[16312]", 768, 97937874);
my $ck = mycrc32($head, 0, $polynomial) ^ mycrc32($body, 0, $polynomial);
print OUT pack("N",$ck).$head.pack("x[12]").$body.pack("Nx[4]",$ck);
# Dummy pages 1..6.
print OUT chr(0) x (6 * 16384);
# Dummy change buffer header page (page 3).
die unless seek(OUT, 3 * 16384, 0);
## FIL_PAGE_OFFSET, FIL_PAGE_PREV, FIL_PAGE_NEXT, FIL_PAGE_TYPE
my $head = pack("NNNx[8]n", 3, 0xffffffff, 0xffffffff, 6);
my $body = pack("x[62]nnx[16272]", 2, 50);
my $ck = mycrc32($head, 0, $polynomial) ^ mycrc32($body, 0, $polynomial);
print OUT pack("N",$ck).$head.pack("x[12]").$body.pack("Nx[4]",$ck);
# Dummy change buffer root page (page 4).
## FIL_PAGE_OFFSET, FIL_PAGE_PREV, FIL_PAGE_NEXT
my $head = pack("NNNx[10]", 4, 0xffffffff, 0xffffffff);
my $body = chr(0) x 16338;
my $ck = mycrc32($head, 0, $polynomial) ^ mycrc32($body, 0, $polynomial);
print OUT pack("N",$ck).$head.pack("x[12]").$body.pack("Nx[4]",$ck);
# Dictionary header page (page 7).
die unless seek(OUT, 7 * 16384, 0);
## FIL_PAGE_OFFSET
$head = pack("Nx[18]", 7);
## DICT_HDR_TABLES,DICT_HDR_INDEXES

View File

@@ -1 +0,0 @@
--innodb --innodb-buffer-pool-size=5MB --innodb-read-io-threads=1 --innodb-doublewrite=0 --innodb-flush-log-at-trx-commit=0

View File

@@ -1,30 +0,0 @@
--source include/windows.inc
# This test is slow on buildbot.
--source include/big_test.inc
# Deadlock in conjunction with the innodb change buffering.
# When innodb change buffering kicks in, i.e secondary non-unique index
# does not fit into the bufferpool, then, on Windows, innodb
# background threads could deadlock whenever index page is
# read, and the page needs load/merge change buffer.
# The test tries to reproduce this situation, by creating index
# that does not fit into bufferpool, and doing a large update.
CREATE TABLE t1(
a INT AUTO_INCREMENT PRIMARY KEY,
b CHAR(255),
INDEX(b))
ENGINE=InnoDB;
INSERT INTO t1(b) SELECT UUID();
BEGIN;
let $i=`select cast(log2(@@innodb_buffer_pool_size/255) as int)`;
while ($i)
{
INSERT INTO t1(b) SELECT UUID() FROM t1;
dec $i;
}
COMMIT;
UPDATE t1 SET b=UUID();
DROP TABLE t1;

View File

@@ -58,9 +58,6 @@ row_format=redundant;
insert into t3 values(444, 'dddd', 'bbbbb', 'aaaaa');
insert into t3 values(555, 'eeee', 'ccccc', 'aaaaa');
# read-only restart requires the change buffer to be empty; therefore we
# do a slow shutdown.
SET GLOBAL innodb_fast_shutdown=0;
--let $restart_parameters= $d --innodb-read-only
--source include/restart_mysqld.inc
@@ -147,7 +144,7 @@ RENAME TABLE t1 TO tee_one;
DROP TABLE t1;
DROP TABLE t2,t3;
--let SEARCH_PATTERN= \[ERROR\] InnoDB: Table test/t1 in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=1 SYS_TABLES\.MIX_LEN=511\b
--let SEARCH_PATTERN= \[ERROR\] InnoDB: Table test/t1 in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=1 SYS_TABLES\.MIX_LEN=511\b.*
--source include/search_pattern_in_file.inc
--let $restart_parameters=

View File

@@ -94,11 +94,6 @@ ALTER TABLE t1 IMPORT TABLESPACE;
ERROR HY000: Got error 42 'Tablespace not found' from ./test/t1.ibd
SET SESSION debug_dbug=@saved_debug_dbug;
restore: t1 .ibd and .cfg files
SET SESSION debug_dbug="+d,ib_import_check_bitmap_failure";
ALTER TABLE t1 IMPORT TABLESPACE;
ERROR HY000: Index for table 't1' is corrupt; try to repair it
SET SESSION debug_dbug=@saved_debug_dbug;
restore: t1 .ibd and .cfg files
SET SESSION debug_dbug="+d,ib_import_cluster_root_adjust_failure";
ALTER TABLE t1 IMPORT TABLESPACE;
ERROR HY000: Index for table 't1' is corrupt; try to repair it

View File

@@ -226,21 +226,6 @@ do "$ENV{MTR_SUITE_DIR}/../innodb/include/innodb-util.pl";
ib_restore_tablespaces("test", "t1");
EOF
# Test failure after ibuf check
SET SESSION debug_dbug="+d,ib_import_check_bitmap_failure";
# Need proper mapping of error codes :-(
--error ER_NOT_KEYFILE
ALTER TABLE t1 IMPORT TABLESPACE;
SET SESSION debug_dbug=@saved_debug_dbug;
# Restore files
perl;
do "$ENV{MTR_SUITE_DIR}/../innodb/include/innodb-util.pl";
ib_restore_tablespaces("test", "t1");
EOF
# Test failure after adjusting the cluster index root page
SET SESSION debug_dbug="+d,ib_import_cluster_root_adjust_failure";

View File

@@ -1,77 +0,0 @@
SET @start_global_value = @@global.innodb_change_buffer_max_size;
SELECT @start_global_value;
@start_global_value
25
Valid values are between 0 and 50
select @@global.innodb_change_buffer_max_size between 0 and 50;
@@global.innodb_change_buffer_max_size between 0 and 50
1
select @@global.innodb_change_buffer_max_size;
@@global.innodb_change_buffer_max_size
25
select @@session.innodb_change_buffer_max_size;
ERROR HY000: Variable 'innodb_change_buffer_max_size' is a GLOBAL variable
show global variables like 'innodb_change_buffer_max_size';
Variable_name Value
innodb_change_buffer_max_size 25
show session variables like 'innodb_change_buffer_max_size';
Variable_name Value
innodb_change_buffer_max_size 25
select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFER_MAX_SIZE 25
select * from information_schema.session_variables where variable_name='innodb_change_buffer_max_size';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFER_MAX_SIZE 25
set global innodb_change_buffer_max_size=10;
select @@global.innodb_change_buffer_max_size;
@@global.innodb_change_buffer_max_size
10
select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFER_MAX_SIZE 10
select * from information_schema.session_variables where variable_name='innodb_change_buffer_max_size';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFER_MAX_SIZE 10
set session innodb_change_buffer_max_size=1;
ERROR HY000: Variable 'innodb_change_buffer_max_size' is a GLOBAL variable and should be set with SET GLOBAL
set global innodb_change_buffer_max_size=1.1;
ERROR 42000: Incorrect argument type to variable 'innodb_change_buffer_max_size'
set global innodb_change_buffer_max_size=1e1;
ERROR 42000: Incorrect argument type to variable 'innodb_change_buffer_max_size'
set global innodb_change_buffer_max_size="foo";
ERROR 42000: Incorrect argument type to variable 'innodb_change_buffer_max_size'
set global innodb_change_buffer_max_size=-7;
Warnings:
Warning 1292 Truncated incorrect innodb_change_buffer_max_size value: '-7'
select @@global.innodb_change_buffer_max_size;
@@global.innodb_change_buffer_max_size
0
select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFER_MAX_SIZE 0
set global innodb_change_buffer_max_size=56;
Warnings:
Warning 1292 Truncated incorrect innodb_change_buffer_max_size value: '56'
select @@global.innodb_change_buffer_max_size;
@@global.innodb_change_buffer_max_size
50
select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFER_MAX_SIZE 50
set global innodb_change_buffer_max_size=0;
select @@global.innodb_change_buffer_max_size;
@@global.innodb_change_buffer_max_size
0
set global innodb_change_buffer_max_size=50;
select @@global.innodb_change_buffer_max_size;
@@global.innodb_change_buffer_max_size
50
set global innodb_change_buffer_max_size=DEFAULT;
select @@global.innodb_change_buffer_max_size;
@@global.innodb_change_buffer_max_size
25
SET @@global.innodb_change_buffer_max_size = @start_global_value;
SELECT @@global.innodb_change_buffer_max_size;
@@global.innodb_change_buffer_max_size
25

View File

@@ -1,73 +0,0 @@
SET @start_global_value = @@global.innodb_change_buffering;
SELECT @start_global_value;
@start_global_value
none
Valid values are 'all', 'deletes', 'changes', 'inserts', 'none', 'purges'
select @@global.innodb_change_buffering in ('all', 'deletes', 'changes', 'inserts', 'none', 'purges');
@@global.innodb_change_buffering in ('all', 'deletes', 'changes', 'inserts', 'none', 'purges')
1
select @@global.innodb_change_buffering;
@@global.innodb_change_buffering
none
select @@session.innodb_change_buffering;
ERROR HY000: Variable 'innodb_change_buffering' is a GLOBAL variable
show global variables like 'innodb_change_buffering';
Variable_name Value
innodb_change_buffering none
show session variables like 'innodb_change_buffering';
Variable_name Value
innodb_change_buffering none
select * from information_schema.global_variables where variable_name='innodb_change_buffering';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING none
select * from information_schema.session_variables where variable_name='innodb_change_buffering';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING none
set global innodb_change_buffering='none';
Warnings:
Warning 1287 '@@innodb_change_buffering' is deprecated and will be removed in a future release
select @@global.innodb_change_buffering;
@@global.innodb_change_buffering
none
select * from information_schema.global_variables where variable_name='innodb_change_buffering';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING none
select * from information_schema.session_variables where variable_name='innodb_change_buffering';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING none
set @@global.innodb_change_buffering='inserts';
Warnings:
Warning 1287 '@@innodb_change_buffering' is deprecated and will be removed in a future release
select @@global.innodb_change_buffering;
@@global.innodb_change_buffering
inserts
select * from information_schema.global_variables where variable_name='innodb_change_buffering';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING inserts
select * from information_schema.session_variables where variable_name='innodb_change_buffering';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING inserts
set session innodb_change_buffering='some';
ERROR HY000: Variable 'innodb_change_buffering' is a GLOBAL variable and should be set with SET GLOBAL
set @@session.innodb_change_buffering='some';
ERROR HY000: Variable 'innodb_change_buffering' is a GLOBAL variable and should be set with SET GLOBAL
set global innodb_change_buffering=1.1;
ERROR 42000: Incorrect argument type to variable 'innodb_change_buffering'
set global innodb_change_buffering=1;
Warnings:
Warning 1287 '@@innodb_change_buffering' is deprecated and will be removed in a future release
SELECT @@global.innodb_change_buffering;
@@global.innodb_change_buffering
inserts
set global innodb_change_buffering=-2;
ERROR 42000: Variable 'innodb_change_buffering' can't be set to the value of '-2'
set global innodb_change_buffering=1e1;
ERROR 42000: Incorrect argument type to variable 'innodb_change_buffering'
set global innodb_change_buffering='some';
ERROR 42000: Variable 'innodb_change_buffering' can't be set to the value of 'some'
SET @@global.innodb_change_buffering = @start_global_value;
Warnings:
Warning 1287 '@@innodb_change_buffering' is deprecated and will be removed in a future release
SELECT @@global.innodb_change_buffering;
@@global.innodb_change_buffering
none

View File

@@ -1,67 +0,0 @@
SET @start_global_value = @@global.innodb_change_buffering_debug;
SELECT @start_global_value;
@start_global_value
0
select @@global.innodb_change_buffering_debug in (0, 1);
@@global.innodb_change_buffering_debug in (0, 1)
1
select @@global.innodb_change_buffering_debug;
@@global.innodb_change_buffering_debug
0
select @@session.innodb_change_buffering_debug;
ERROR HY000: Variable 'innodb_change_buffering_debug' is a GLOBAL variable
show global variables like 'innodb_change_buffering_debug';
Variable_name Value
innodb_change_buffering_debug 0
show session variables like 'innodb_change_buffering_debug';
Variable_name Value
innodb_change_buffering_debug 0
select * from information_schema.global_variables where variable_name='innodb_change_buffering_debug';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING_DEBUG 0
select * from information_schema.session_variables where variable_name='innodb_change_buffering_debug';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING_DEBUG 0
set global innodb_change_buffering_debug=1;
select @@global.innodb_change_buffering_debug;
@@global.innodb_change_buffering_debug
1
select * from information_schema.global_variables where variable_name='innodb_change_buffering_debug';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING_DEBUG 1
select * from information_schema.session_variables where variable_name='innodb_change_buffering_debug';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING_DEBUG 1
set @@global.innodb_change_buffering_debug=0;
select @@global.innodb_change_buffering_debug;
@@global.innodb_change_buffering_debug
0
select * from information_schema.global_variables where variable_name='innodb_change_buffering_debug';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING_DEBUG 0
select * from information_schema.session_variables where variable_name='innodb_change_buffering_debug';
VARIABLE_NAME VARIABLE_VALUE
INNODB_CHANGE_BUFFERING_DEBUG 0
set session innodb_change_buffering_debug='some';
ERROR HY000: Variable 'innodb_change_buffering_debug' is a GLOBAL variable and should be set with SET GLOBAL
set @@session.innodb_change_buffering_debug='some';
ERROR HY000: Variable 'innodb_change_buffering_debug' is a GLOBAL variable and should be set with SET GLOBAL
set global innodb_change_buffering_debug=1.1;
ERROR 42000: Incorrect argument type to variable 'innodb_change_buffering_debug'
set global innodb_change_buffering_debug='foo';
ERROR 42000: Incorrect argument type to variable 'innodb_change_buffering_debug'
set global innodb_change_buffering_debug=-2;
Warnings:
Warning 1292 Truncated incorrect innodb_change_buffering_debug value: '-2'
set global innodb_change_buffering_debug=1e1;
ERROR 42000: Incorrect argument type to variable 'innodb_change_buffering_debug'
set global innodb_change_buffering_debug=2;
Warnings:
Warning 1292 Truncated incorrect innodb_change_buffering_debug value: '2'
select @@global.innodb_change_buffering_debug;
@@global.innodb_change_buffering_debug
1
SET @@global.innodb_change_buffering_debug = @start_global_value;
SELECT @@global.innodb_change_buffering_debug;
@@global.innodb_change_buffering_debug
0

View File

@@ -223,54 +223,6 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST OFF,ON
READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_CHANGE_BUFFERING
SESSION_VALUE NULL
DEFAULT_VALUE none
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE ENUM
VARIABLE_COMMENT Buffer changes to secondary indexes.
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST none,inserts,deletes,changes,purges,all
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME INNODB_CHANGE_BUFFERING_DEBUG
SESSION_VALUE NULL
DEFAULT_VALUE 0
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE INT UNSIGNED
VARIABLE_COMMENT Debug flags for InnoDB change buffering (0=none, 1=try to buffer)
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 1
NUMERIC_BLOCK_SIZE 0
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME INNODB_CHANGE_BUFFER_DUMP
SESSION_VALUE NULL
DEFAULT_VALUE OFF
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BOOLEAN
VARIABLE_COMMENT Dump the change buffer at startup.
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST OFF,ON
READ_ONLY YES
COMMAND_LINE_ARGUMENT NONE
VARIABLE_NAME INNODB_CHANGE_BUFFER_MAX_SIZE
SESSION_VALUE NULL
DEFAULT_VALUE 25
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE INT UNSIGNED
VARIABLE_COMMENT Maximum on-disk size of change buffer in terms of percentage of the buffer pool.
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 50
NUMERIC_BLOCK_SIZE 0
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME INNODB_CHECKSUM_ALGORITHM
SESSION_VALUE NULL
DEFAULT_VALUE full_crc32

View File

@@ -1,71 +0,0 @@
# 2011-02-09 - Added
#
--source include/have_innodb.inc
SET @start_global_value = @@global.innodb_change_buffer_max_size;
SELECT @start_global_value;
#
# exists as global only
#
--echo Valid values are between 0 and 50
select @@global.innodb_change_buffer_max_size between 0 and 50;
select @@global.innodb_change_buffer_max_size;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
select @@session.innodb_change_buffer_max_size;
show global variables like 'innodb_change_buffer_max_size';
show session variables like 'innodb_change_buffer_max_size';
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
select * from information_schema.session_variables where variable_name='innodb_change_buffer_max_size';
--enable_warnings
#
# show that it's writable
#
set global innodb_change_buffer_max_size=10;
select @@global.innodb_change_buffer_max_size;
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
select * from information_schema.session_variables where variable_name='innodb_change_buffer_max_size';
--enable_warnings
--error ER_GLOBAL_VARIABLE
set session innodb_change_buffer_max_size=1;
#
# incorrect types
#
--error ER_WRONG_TYPE_FOR_VAR
set global innodb_change_buffer_max_size=1.1;
--error ER_WRONG_TYPE_FOR_VAR
set global innodb_change_buffer_max_size=1e1;
--error ER_WRONG_TYPE_FOR_VAR
set global innodb_change_buffer_max_size="foo";
set global innodb_change_buffer_max_size=-7;
select @@global.innodb_change_buffer_max_size;
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
--enable_warnings
set global innodb_change_buffer_max_size=56;
select @@global.innodb_change_buffer_max_size;
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
--enable_warnings
#
# min/max/DEFAULT values
#
set global innodb_change_buffer_max_size=0;
select @@global.innodb_change_buffer_max_size;
set global innodb_change_buffer_max_size=50;
select @@global.innodb_change_buffer_max_size;
set global innodb_change_buffer_max_size=DEFAULT;
select @@global.innodb_change_buffer_max_size;
SET @@global.innodb_change_buffer_max_size = @start_global_value;
SELECT @@global.innodb_change_buffer_max_size;

View File

@@ -1,65 +0,0 @@
# 2010-01-25 - Added
#
--source include/have_innodb.inc
SET @start_global_value = @@global.innodb_change_buffering;
SELECT @start_global_value;
#
# exists as global only
#
--echo Valid values are 'all', 'deletes', 'changes', 'inserts', 'none', 'purges'
select @@global.innodb_change_buffering in ('all', 'deletes', 'changes', 'inserts', 'none', 'purges');
select @@global.innodb_change_buffering;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
select @@session.innodb_change_buffering;
show global variables like 'innodb_change_buffering';
show session variables like 'innodb_change_buffering';
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_change_buffering';
select * from information_schema.session_variables where variable_name='innodb_change_buffering';
--enable_warnings
#
# show that it's writable
#
set global innodb_change_buffering='none';
select @@global.innodb_change_buffering;
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_change_buffering';
select * from information_schema.session_variables where variable_name='innodb_change_buffering';
--enable_warnings
set @@global.innodb_change_buffering='inserts';
select @@global.innodb_change_buffering;
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_change_buffering';
select * from information_schema.session_variables where variable_name='innodb_change_buffering';
--enable_warnings
--error ER_GLOBAL_VARIABLE
set session innodb_change_buffering='some';
--error ER_GLOBAL_VARIABLE
set @@session.innodb_change_buffering='some';
#
# incorrect types
#
--error ER_WRONG_TYPE_FOR_VAR
set global innodb_change_buffering=1.1;
set global innodb_change_buffering=1;
SELECT @@global.innodb_change_buffering;
--error ER_WRONG_VALUE_FOR_VAR
set global innodb_change_buffering=-2;
--error ER_WRONG_TYPE_FOR_VAR
set global innodb_change_buffering=1e1;
--error ER_WRONG_VALUE_FOR_VAR
set global innodb_change_buffering='some';
#
# Cleanup
#
SET @@global.innodb_change_buffering = @start_global_value;
SELECT @@global.innodb_change_buffering;

View File

@@ -1,59 +0,0 @@
--source include/have_innodb.inc
--source include/have_debug.inc
SET @start_global_value = @@global.innodb_change_buffering_debug;
SELECT @start_global_value;
#
# exists as global only
#
select @@global.innodb_change_buffering_debug in (0, 1);
select @@global.innodb_change_buffering_debug;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
select @@session.innodb_change_buffering_debug;
show global variables like 'innodb_change_buffering_debug';
show session variables like 'innodb_change_buffering_debug';
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_change_buffering_debug';
select * from information_schema.session_variables where variable_name='innodb_change_buffering_debug';
--enable_warnings
#
# show that it's writable
#
set global innodb_change_buffering_debug=1;
select @@global.innodb_change_buffering_debug;
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_change_buffering_debug';
select * from information_schema.session_variables where variable_name='innodb_change_buffering_debug';
--enable_warnings
set @@global.innodb_change_buffering_debug=0;
select @@global.innodb_change_buffering_debug;
--disable_warnings
select * from information_schema.global_variables where variable_name='innodb_change_buffering_debug';
select * from information_schema.session_variables where variable_name='innodb_change_buffering_debug';
--enable_warnings
--error ER_GLOBAL_VARIABLE
set session innodb_change_buffering_debug='some';
--error ER_GLOBAL_VARIABLE
set @@session.innodb_change_buffering_debug='some';
#
# incorrect types
#
--error ER_WRONG_TYPE_FOR_VAR
set global innodb_change_buffering_debug=1.1;
--error ER_WRONG_TYPE_FOR_VAR
set global innodb_change_buffering_debug='foo';
set global innodb_change_buffering_debug=-2;
--error ER_WRONG_TYPE_FOR_VAR
set global innodb_change_buffering_debug=1e1;
set global innodb_change_buffering_debug=2;
select @@global.innodb_change_buffering_debug;
#
# Cleanup
#
SET @@global.innodb_change_buffering_debug = @start_global_value;
SELECT @@global.innodb_change_buffering_debug;

View File

@@ -1,5 +1,5 @@
/* Copyright (c) 2000, 2015, Oracle and/or its affiliates.
Copyright (c) 2008, 2022, MariaDB
Copyright (c) 2008, 2023, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -5324,6 +5324,10 @@ static int init_server_components()
MARIADB_REMOVED_OPTION("innodb-thread-concurrency"),
MARIADB_REMOVED_OPTION("innodb-thread-sleep-delay"),
MARIADB_REMOVED_OPTION("innodb-undo-logs"),
/* The following options were deprecated in 10.9 */
MARIADB_REMOVED_OPTION("innodb-change-buffering"),
{0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
/*

View File

@@ -1,6 +1,6 @@
# Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2014, 2022, MariaDB Corporation.
# Copyright (c) 2014, 2023, MariaDB Corporation.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -270,7 +270,6 @@ SET(INNOBASE_SOURCES
include/handler0alter.h
include/hash0hash.h
include/ibuf0ibuf.h
include/ibuf0ibuf.inl
include/lock0iter.h
include/lock0lock.h
include/lock0lock.inl

View File

@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2014, 2022, MariaDB Corporation.
Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -37,7 +37,6 @@ Created 6/2/1994 Heikki Tuuri
#include "btr0defragment.h"
#include "rem0cmp.h"
#include "lock0lock.h"
#include "ibuf0ibuf.h"
#include "trx0trx.h"
#include "srv0mon.h"
#include "gis0geo.h"
@@ -181,9 +180,8 @@ we allocate pages for the non-leaf levels of the tree.
@param block B-tree root page
@param space tablespace
@return whether the segment header is valid */
static bool btr_root_fseg_validate(ulint offset,
const buf_block_t &block,
const fil_space_t &space)
bool btr_root_fseg_validate(ulint offset, const buf_block_t &block,
const fil_space_t &space)
{
ut_ad(block.page.id().space() == space.id);
const uint16_t hdr= mach_read_from_2(offset + FSEG_HDR_OFFSET +
@@ -213,12 +211,11 @@ ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index)
@param[in] index index tree
@param[in] page page number
@param[in] mode latch mode
@param[in] merge whether change buffer merge should be attempted
@param[in,out] mtr mini-transaction
@param[out] err error code
@return block */
buf_block_t *btr_block_get(const dict_index_t &index,
uint32_t page, ulint mode, bool merge,
uint32_t page, ulint mode,
mtr_t *mtr, dberr_t *err)
{
dberr_t local_err;
@@ -227,7 +224,7 @@ buf_block_t *btr_block_get(const dict_index_t &index,
buf_block_t *block=
buf_page_get_gen(page_id_t{index.table->space->id, page},
index.table->space->zip_size(), mode, nullptr, BUF_GET,
mtr, err, merge && !index.is_clust());
mtr, err);
ut_ad(!block == (*err != DB_SUCCESS));
if (UNIV_LIKELY(block != nullptr))
@@ -266,15 +263,13 @@ btr_root_block_get(
return nullptr;
}
buf_block_t *block = btr_block_get(*index, index->page, mode, false, mtr,
err);
buf_block_t *block = btr_block_get(*index, index->page, mode, mtr, err);
if (block)
{
if (index->is_ibuf());
else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
*block, *index->table->space) ||
!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
*block, *index->table->space))
if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
*block, *index->table->space) ||
!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
*block, *index->table->space))
{
*err= DB_CORRUPTION;
block= nullptr;
@@ -460,46 +455,13 @@ btr_page_create(
}
}
/**************************************************************//**
Allocates a new file page to be used in an ibuf tree. Takes the page from
the free list of the tree, which must contain pages!
@return new allocated block, x-latched */
static
buf_block_t*
btr_page_alloc_for_ibuf(
/*====================*/
dict_index_t* index, /*!< in: index tree */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
{
buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, err);
if (UNIV_UNLIKELY(!root))
return root;
buf_block_t *new_block=
buf_page_get_gen(page_id_t(index->table->space_id,
mach_read_from_4(PAGE_HEADER +
PAGE_BTR_IBUF_FREE_LIST +
FLST_FIRST + FIL_ADDR_PAGE +
root->page.frame)),
index->table->space->zip_size(), RW_X_LATCH, nullptr,
BUF_GET, mtr, err);
if (new_block)
*err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block,
PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
ut_d(if (*err == DB_SUCCESS)
flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
return new_block;
}
/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents!
@retval NULL if no page could be allocated */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
MY_ATTRIBUTE((nonnull, warn_unused_result))
buf_block_t*
btr_page_alloc_low(
/*===============*/
btr_page_alloc(
dict_index_t* index, /*!< in: index */
uint32_t hint_page_no, /*!< in: hint of a good page */
byte file_direction, /*!< in: direction where a possible
@@ -513,6 +475,8 @@ btr_page_alloc_low(
page should be initialized. */
dberr_t* err) /*!< out: error code */
{
ut_ad(level < BTR_MAX_NODE_LEVEL);
const auto savepoint= mtr->get_savepoint();
buf_block_t *root= btr_root_block_get(index, RW_NO_LATCH, mtr, err);
if (UNIV_UNLIKELY(!root))
@@ -540,57 +504,6 @@ btr_page_alloc_low(
true, mtr, init_mtr, err);
}
/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents!
@retval NULL if no page could be allocated */
buf_block_t*
btr_page_alloc(
/*===========*/
dict_index_t* index, /*!< in: index */
uint32_t hint_page_no, /*!< in: hint of a good page */
byte file_direction, /*!< in: direction where a possible
page split is made */
ulint level, /*!< in: level where the page is placed
in the tree */
mtr_t* mtr, /*!< in/out: mini-transaction
for the allocation */
mtr_t* init_mtr, /*!< in/out: mini-transaction
for x-latching and initializing
the page */
dberr_t* err) /*!< out: error code */
{
ut_ad(level < BTR_MAX_NODE_LEVEL);
return index->is_ibuf()
? btr_page_alloc_for_ibuf(index, mtr, err)
: btr_page_alloc_low(index, hint_page_no, file_direction, level,
mtr, init_mtr, err);
}
/**************************************************************//**
Frees a page used in an ibuf tree. Puts the page to the free list of the
ibuf tree. */
static
dberr_t
btr_page_free_for_ibuf(
/*===================*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
dberr_t err;
if (buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, &err))
{
err= flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
ut_d(if (err == DB_SUCCESS)
flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
}
return err;
}
/** Free an index page.
@param[in,out] index index tree
@param[in,out] block block to be freed
@@ -623,9 +536,6 @@ dberr_t btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
The page will be freed, so previous changes to it by this
mini-transaction should not matter. */
if (index->is_ibuf())
return btr_page_free_for_ibuf(index, block, mtr);
fil_space_t *space= index->table->space;
dberr_t err;
@@ -708,8 +618,7 @@ btr_node_ptr_get_child(
return btr_block_get(
*index, btr_node_ptr_get_child_page_no(node_ptr, offsets),
RW_SX_LATCH, btr_page_get_level(page_align(node_ptr)) == 1,
mtr, err);
RW_SX_LATCH, mtr, err);
}
MY_ATTRIBUTE((nonnull(2,3,5), warn_unused_result))
@@ -930,77 +839,32 @@ btr_create(
mtr_t* mtr,
dberr_t* err)
{
buf_block_t* block;
ut_ad(mtr->is_named_space(space));
ut_ad(index_id != BTR_FREED_INDEX_ID);
ut_ad(index || space == fil_system.sys_space);
/* Create the two new segments (one, in the case of an ibuf tree) for
the index tree; the segment headers are put on the allocated root page
(for an ibuf tree, not in the root, but on a separate ibuf header
page) */
/* Create the two new segments for the index tree;
the segment headers are put on the allocated root page */
if (UNIV_UNLIKELY(type & DICT_IBUF)) {
/* Allocate first the ibuf header page */
buf_block_t* ibuf_hdr_block = fseg_create(
space, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr, err);
buf_block_t *block = fseg_create(space, PAGE_HEADER + PAGE_BTR_SEG_TOP,
mtr, err);
if (ibuf_hdr_block == NULL) {
return(FIL_NULL);
}
if (!block) {
return FIL_NULL;
}
ut_ad(ibuf_hdr_block->page.id().page_no()
== IBUF_HEADER_PAGE_NO);
/* Allocate then the next page to the segment: it will be the
tree root page */
block = fseg_alloc_free_page_general(
buf_block_get_frame(ibuf_hdr_block)
+ IBUF_HEADER + IBUF_TREE_SEG_HEADER,
IBUF_TREE_ROOT_PAGE_NO,
FSP_UP, false, mtr, mtr, err);
if (block == NULL) {
return(FIL_NULL);
}
ut_ad(block->page.id() == page_id_t(0,IBUF_TREE_ROOT_PAGE_NO));
flst_init(block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
} else {
block = fseg_create(space, PAGE_HEADER + PAGE_BTR_SEG_TOP,
mtr, err);
if (block == NULL) {
return(FIL_NULL);
}
if (!fseg_create(space, PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr,
err, false, block)) {
/* Not enough space for new segment, free root
segment before return. */
btr_free_root(block, *space, mtr);
return(FIL_NULL);
}
if (!fseg_create(space, PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr,
err, false, block)) {
/* Not enough space for new segment, free root
segment before return. */
btr_free_root(block, *space, mtr);
return FIL_NULL;
}
ut_ad(!page_has_siblings(block->page.frame));
btr_root_page_init(block, index_id, index, mtr);
/* We reset the free bits for the page in a separate
mini-transaction to allow creation of several trees in the
same mtr, otherwise the latch on a bitmap page would prevent
it because of the latching order.
Note: Insert Buffering is disabled for temporary tables given that
most temporary tables are smaller in size and short-lived. */
if (!(type & DICT_CLUSTERED)
&& (!index || !index->table->is_temporary())) {
ibuf_reset_free_bits(block);
}
/* In the following assertion we test that two records of maximum
allowed size fit on the root page: this fact is needed to ensure
correctness of split algorithms */
@@ -1152,7 +1016,7 @@ void btr_drop_temporary_table(const dict_table_t &table)
{
if (buf_block_t *block= buf_page_get_low({SRV_TMP_SPACE_ID, index->page}, 0,
RW_X_LATCH, nullptr, BUF_GET, &mtr,
nullptr, false))
nullptr))
{
btr_free_but_not_root(block, MTR_LOG_NO_REDO);
mtr.set_log_mode(MTR_LOG_NO_REDO);
@@ -1323,18 +1187,18 @@ static dberr_t btr_page_reorganize_low(page_cur_t *cursor, mtr_t *mtr)
if (page_get_max_trx_id(block->page.frame))
/* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than
clustered index root pages. */
ut_ad(dict_index_is_sec_or_ibuf(cursor->index)
ut_ad(!cursor->index->is_primary()
? page_is_leaf(block->page.frame)
: block->page.id().page_no() == cursor->index->page);
else
/* PAGE_MAX_TRX_ID is unused in clustered index pages (other than
the root where it is repurposed as PAGE_ROOT_AUTO_INC), non-leaf
pages, and in temporary tables. It was always zero-initialized in
page_create(). PAGE_MAX_TRX_ID must be nonzero on
dict_index_is_sec_or_ibuf() leaf pages. */
page_create(). PAGE_MAX_TRX_ID must be nonzero on secondary index
leaf pages. */
ut_ad(cursor->index->table->is_temporary() ||
!page_is_leaf(block->page.frame) ||
!dict_index_is_sec_or_ibuf(cursor->index));
cursor->index->is_primary());
#endif
const uint16_t data_size1= page_get_data_size(old->page.frame);
@@ -1534,15 +1398,7 @@ static dberr_t btr_page_reorganize_low(page_cur_t *cursor, mtr_t *mtr)
return DB_SUCCESS;
}
/*************************************************************//**
Reorganizes an index page.
IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index. This has to
be done either within the same mini-transaction, or by invoking
ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
IBUF_BITMAP_FREE is unaffected by reorganization.
/** Reorganize an index page.
@return error code
@retval DB_FAIL if reorganizing a ROW_FORMAT=COMPRESSED page failed */
dberr_t
@@ -1561,15 +1417,7 @@ btr_page_reorganize_block(
return btr_page_reorganize_low(&cur, mtr);
}
/*************************************************************//**
Reorganizes an index page.
IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index. This has to
be done either within the same mini-transaction, or by invoking
ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
IBUF_BITMAP_FREE is unaffected by reorganization.
/** Reorganize an index page.
@param cursor page cursor
@param mtr mini-transaction
@return error code
@@ -1800,6 +1648,7 @@ btr_root_raise_and_insert(
ut_ad(!page_is_empty(root->page.frame));
index = btr_cur_get_index(cursor);
ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable));
ut_ad(!index->is_spatial());
#ifdef UNIV_ZIP_DEBUG
ut_a(!root_page_zip
|| page_zip_validate(root_page_zip, root->page.frame, index));
@@ -1815,12 +1664,11 @@ btr_root_raise_and_insert(
return nullptr;
}
if (index->is_ibuf()) {
} else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
*root, *index->table->space)
|| !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
*root, *index->table->space)) {
return nullptr;
if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
*root, *index->table->space)
|| !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
*root, *index->table->space)) {
return nullptr;
}
/* Allocate a new page to the tree. Root splitting is done by first
@@ -1891,18 +1739,12 @@ btr_root_raise_and_insert(
page_get_infimum_rec(root->page.frame));
}
/* Move any existing predicate locks */
if (dict_index_is_spatial(index)) {
lock_prdt_rec_move(new_block, root_id);
} else {
btr_search_move_or_delete_hash_entries(
new_block, root);
}
btr_search_move_or_delete_hash_entries(new_block, root);
}
constexpr uint16_t max_trx_id = PAGE_HEADER + PAGE_MAX_TRX_ID;
if (dict_index_is_sec_or_ibuf(index)) {
/* In secondary indexes and the change buffer,
if (!index->is_primary()) {
/* In secondary indexes,
PAGE_MAX_TRX_ID can be reset on the root page, because
the field only matters on leaf pages, and the root no
longer is a leaf page. (Older versions of InnoDB did
@@ -1952,16 +1794,8 @@ btr_root_raise_and_insert(
/* Build the node pointer (= node key and page address) for the
child */
if (dict_index_is_spatial(index)) {
rtr_mbr_t new_mbr;
rtr_page_cal_mbr(index, new_block, &new_mbr, *heap);
node_ptr = rtr_index_build_node_ptr(
index, &new_mbr, rec, new_page_no, *heap);
} else {
node_ptr = dict_index_build_node_ptr(
index, rec, new_page_no, *heap, level);
}
node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, *heap,
level);
/* The node pointer must be marked as the predefined minimum record,
as there is no lower alphabetical limit to records in the leftmost
node of a level: */
@@ -1994,13 +1828,6 @@ btr_root_raise_and_insert(
to new_block at this point. Thus, the data should fit. */
ut_a(node_ptr_rec);
/* We play safe and reset the free bits for the new page */
if (!dict_index_is_clust(index)
&& !index->table->is_temporary()) {
ibuf_reset_free_bits(new_block);
}
page_cursor->block = new_block;
page_cursor->index = index;
@@ -2479,11 +2306,11 @@ btr_attach_half_pages(
/* for consistency, both blocks should be locked, before change */
if (prev_page_no != FIL_NULL && direction == FSP_DOWN) {
prev_block = btr_block_get(*index, prev_page_no, RW_X_LATCH,
!level, mtr);
mtr);
}
if (next_page_no != FIL_NULL && direction != FSP_DOWN) {
next_block = btr_block_get(*index, next_page_no, RW_X_LATCH,
!level, mtr);
mtr);
}
/* Build the node pointer (= node key and page address) for the upper
@@ -2629,10 +2456,9 @@ btr_insert_into_right_sibling(
page_t* next_page;
btr_cur_t next_father_cursor;
rec_t* rec = nullptr;
ulint max_size;
next_block = btr_block_get(*cursor->index(), next_page_no, RW_X_LATCH,
page_is_leaf(page), mtr);
mtr);
if (UNIV_UNLIKELY(!next_block)) {
return nullptr;
}
@@ -2655,8 +2481,6 @@ btr_insert_into_right_sibling(
return nullptr;
}
max_size = page_get_max_insert_size_after_reorganize(next_page, 1);
/* Extends gap lock for the next page */
if (is_leaf && cursor->index()->has_locking()) {
lock_update_node_pointer(block, next_block);
@@ -2666,15 +2490,6 @@ btr_insert_into_right_sibling(
n_ext, mtr);
if (!rec) {
if (is_leaf
&& next_block->page.zip.ssize
&& !dict_index_is_clust(cursor->index())
&& !cursor->index()->table->is_temporary()) {
/* Reset the IBUF_BITMAP_FREE bits, because
page_cur_tuple_insert() will have attempted page
reorganize before failing. */
ibuf_reset_free_bits(next_block);
}
return nullptr;
}
@@ -2712,34 +2527,12 @@ btr_insert_into_right_sibling(
}
ut_ad(rec_offs_validate(rec, cursor->index(), *offsets));
if (is_leaf
&& !dict_index_is_clust(cursor->index())
&& !cursor->index()->table->is_temporary()) {
/* Update the free bits of the B-tree page in the
insert buffer bitmap. */
if (next_block->page.zip.ssize) {
ibuf_update_free_bits_zip(next_block, mtr);
} else {
ibuf_update_free_bits_if_full(
next_block, max_size,
rec_offs_size(*offsets) + PAGE_DIR_SLOT_SIZE);
}
}
return(rec);
}
/*************************************************************//**
Moves record list end to another page. Moved records include
split_rec.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return error code */
static
dberr_t
@@ -2795,12 +2588,6 @@ page_move_rec_list_end(
/*************************************************************//**
Moves record list start to another page. Moved records do not include
split_rec.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return error code */
static
dberr_t
@@ -2858,12 +2645,7 @@ btr_page_split_and_insert(
ut_ad(*err == DB_SUCCESS);
ut_ad(dtuple_check_typed(tuple));
if (cursor->index()->is_spatial()) {
/* Split rtree page and update parent */
return rtr_page_split_and_insert(flags, cursor, offsets, heap,
tuple, n_ext, mtr, err);
}
ut_ad(!cursor->index()->is_spatial());
if (!*heap) {
*heap = mem_heap_create(1024);
@@ -3228,13 +3010,6 @@ insert_empty:
/* The insert did not fit on the page: loop back to the
start of the function for a new split */
insert_failed:
/* We play safe and reset the free bits for new_page */
if (!dict_index_is_clust(page_cursor->index)
&& !page_cursor->index->table->is_temporary()) {
ibuf_reset_free_bits(new_block);
ibuf_reset_free_bits(block);
}
n_iterations++;
ut_ad(n_iterations < 2
|| buf_block_get_page_zip(insert_block));
@@ -3244,17 +3019,6 @@ insert_failed:
}
func_exit:
/* Insert fit on the page: update the free bits for the
left and right pages in the same mtr */
if (!dict_index_is_clust(page_cursor->index)
&& !page_cursor->index->table->is_temporary()
&& page_is_leaf(page)) {
ibuf_update_free_bits_for_two_pages_low(
left_block, right_block, mtr);
}
MONITOR_INC(MONITOR_INDEX_SPLIT);
ut_ad(page_validate(buf_block_get_frame(left_block),
@@ -3287,8 +3051,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block,
if (prev_page_no != FIL_NULL) {
buf_block_t* prev_block = btr_block_get(
index, prev_page_no, RW_X_LATCH, page_is_leaf(page),
mtr, &err);
index, prev_page_no, RW_X_LATCH, mtr, &err);
if (UNIV_UNLIKELY(!prev_block)) {
return err;
}
@@ -3303,8 +3066,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block,
if (next_page_no != FIL_NULL) {
buf_block_t* next_block = btr_block_get(
index, next_page_no, RW_X_LATCH, page_is_leaf(page),
mtr, &err);
index, next_page_no, RW_X_LATCH, mtr, &err);
if (UNIV_UNLIKELY(!next_block)) {
return err;
@@ -3332,6 +3094,7 @@ btr_lift_page_up(
must not be empty: use
btr_discard_only_page_on_level if the last
record from the page should be removed */
que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr, /*!< in/out: mini-transaction */
dberr_t* err) /*!< out: error code */
{
@@ -3366,7 +3129,8 @@ btr_lift_page_up(
if (index->is_spatial()) {
offsets = rtr_page_get_father_block(
nullptr, heap, mtr, nullptr, &cursor);
nullptr, heap, nullptr, &cursor,
thr, mtr);
} else {
offsets = btr_page_get_father_block(offsets, heap,
mtr, &cursor);
@@ -3387,7 +3151,8 @@ btr_lift_page_up(
if (index->is_spatial()) {
offsets = rtr_page_get_father_block(
nullptr, heap, mtr, nullptr, &cursor);
nullptr, heap, nullptr, &cursor, thr,
mtr);
} else {
offsets = btr_page_get_father_block(offsets,
heap,
@@ -3518,13 +3283,8 @@ copied:
/* Free the file page */
btr_page_free(index, block, mtr);
/* We play it safe and reset the free bits for the father */
if (!dict_index_is_clust(index)
&& !index->table->is_temporary()) {
ibuf_reset_free_bits(father_block);
}
ut_ad(page_validate(father_block->page.frame, index));
ut_ad(btr_check_node_ptr(index, father_block, mtr));
ut_ad(btr_check_node_ptr(index, father_block, thr, mtr));
return(lift_father_up ? block_orig : father_block);
}
@@ -3591,8 +3351,10 @@ btr_compress(
father_cursor.page_cur.block = block;
if (index->is_spatial()) {
ut_ad(cursor->rtr_info);
offsets = rtr_page_get_father_block(
NULL, heap, mtr, cursor, &father_cursor);
nullptr, heap, cursor, &father_cursor,
cursor->rtr_info->thr, mtr);
ut_ad(cursor->page_cur.block->page.id() == block->page.id());
rec_t* my_rec = father_cursor.page_cur.rec;
@@ -3602,10 +3364,10 @@ btr_compress(
ib::info() << "father positioned on page "
<< page_no << "instead of "
<< block->page.id().page_no();
offsets = btr_page_get_father_block(
NULL, heap, mtr, &father_cursor);
goto get_offsets;
}
} else {
get_offsets:
offsets = btr_page_get_father_block(
NULL, heap, mtr, &father_cursor);
}
@@ -3615,14 +3377,7 @@ btr_compress(
if (UNIV_UNLIKELY(!nth_rec || nth_rec == ULINT_UNDEFINED)) {
corrupted:
err = DB_CORRUPTION;
err_exit:
/* We play it safe and reset the free bits. */
if (merge_block && merge_block->zip_size()
&& page_is_leaf(merge_block->page.frame)
&& !index->is_clust()) {
ibuf_reset_free_bits(merge_block);
}
goto func_exit;
goto err_exit;
}
}
@@ -3630,7 +3385,10 @@ btr_compress(
/* The page is the only one on the level, lift the records
to the father */
merge_block = btr_lift_page_up(index, block, mtr, &err);
merge_block = btr_lift_page_up(index, block,
cursor->rtr_info
? cursor->rtr_info->thr
: nullptr, mtr, &err);
success:
if (adjust) {
ut_ad(nth_rec > 0);
@@ -3645,7 +3403,7 @@ success:
}
MONITOR_INC(MONITOR_INDEX_MERGE_SUCCESSFUL);
func_exit:
err_exit:
mem_heap_free(heap);
DBUG_RETURN(err);
}
@@ -3945,49 +3703,6 @@ cannot_merge:
}
}
if (!dict_index_is_clust(index)
&& !index->table->is_temporary()
&& page_is_leaf(merge_page)) {
/* Update the free bits of the B-tree page in the
insert buffer bitmap. This has to be done in a
separate mini-transaction that is committed before the
main mini-transaction. We cannot update the insert
buffer bitmap in this mini-transaction, because
btr_compress() can be invoked recursively without
committing the mini-transaction in between. Since
insert buffer bitmap pages have a lower rank than
B-tree pages, we must not access other pages in the
same mini-transaction after accessing an insert buffer
bitmap page. */
/* The free bits in the insert buffer bitmap must
never exceed the free space on a page. It is safe to
decrement or reset the bits in the bitmap in a
mini-transaction that is committed before the
mini-transaction that affects the free space. */
/* It is unsafe to increment the bits in a separately
committed mini-transaction, because in crash recovery,
the free bits could momentarily be set too high. */
if (merge_block->zip_size()) {
/* Because the free bits may be incremented
and we cannot update the insert buffer bitmap
in the same mini-transaction, the only safe
thing we can do here is the pessimistic
approach: reset the free bits. */
ibuf_reset_free_bits(merge_block);
} else {
/* On uncompressed pages, the free bits will
never increase here. Thus, it is safe to
write the bits accurately in a separate
mini-transaction. */
ibuf_update_free_bits_if_full(merge_block,
srv_page_size,
ULINT_UNDEFINED);
}
}
ut_ad(page_validate(merge_page, index));
#ifdef UNIV_ZIP_DEBUG
ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page,
@@ -4002,7 +3717,10 @@ cannot_merge:
err = btr_page_free(index, block, mtr);
if (err == DB_SUCCESS) {
ut_ad(leftmost_child
|| btr_check_node_ptr(index, merge_block, mtr));
|| btr_check_node_ptr(index, merge_block,
cursor->rtr_info
? cursor->rtr_info->thr
: nullptr, mtr));
goto success;
} else {
goto err_exit;
@@ -4019,11 +3737,13 @@ static
void
btr_discard_only_page_on_level(
/*===========================*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: page which is the only on its level */
btr_cur_t* cur, /*!< in: cursor on a page which is the
only on its level */
mtr_t* mtr) /*!< in: mtr */
{
ulint page_level = 0;
dict_index_t* index = cur->index();
buf_block_t* block = btr_cur_get_block(cur);
ulint page_level = 0;
ut_ad(!index->is_dummy);
@@ -4054,7 +3774,8 @@ btr_discard_only_page_on_level(
if (index->is_spatial()) {
/* Check any concurrent search having this page */
rtr_check_discard_page(index, NULL, block);
if (!rtr_page_get_father(mtr, nullptr, &cursor)) {
if (!rtr_page_get_father(mtr, nullptr, &cursor,
cur->rtr_info->thr)) {
return;
}
} else {
@@ -4120,9 +3841,6 @@ btr_discard_only_page_on_level(
index->clear_instant_add();
}
} else if (!index->table->is_temporary()) {
/* We play it safe and reset the free bits for the root */
ibuf_reset_free_bits(block);
ut_a(max_trx_id);
page_set_max_trx_id(block,
buf_block_get_page_zip(block),
@@ -4159,7 +3877,8 @@ btr_discard_page(
MONITOR_INC(MONITOR_INDEX_DISCARD);
if (index->is_spatial()
? !rtr_page_get_father(mtr, cursor, &parent_cursor)
? !rtr_page_get_father(mtr, cursor, &parent_cursor,
cursor->rtr_info->thr)
: !btr_page_get_father(mtr, &parent_cursor)) {
return DB_CORRUPTION;
}
@@ -4173,7 +3892,7 @@ btr_discard_page(
if (left_page_no != FIL_NULL) {
dberr_t err;
merge_block = btr_block_get(*index, left_page_no, RW_X_LATCH,
true, mtr, &err);
mtr, &err);
if (UNIV_UNLIKELY(!merge_block)) {
return err;
}
@@ -4194,7 +3913,7 @@ btr_discard_page(
} else if (right_page_no != FIL_NULL) {
dberr_t err;
merge_block = btr_block_get(*index, right_page_no, RW_X_LATCH,
true, mtr, &err);
mtr, &err);
if (UNIV_UNLIKELY(!merge_block)) {
return err;
}
@@ -4220,7 +3939,7 @@ btr_discard_page(
return DB_CORRUPTION;
}
} else {
btr_discard_only_page_on_level(index, block, mtr);
btr_discard_only_page_on_level(cursor, mtr);
return DB_SUCCESS;
}
@@ -4278,14 +3997,20 @@ btr_discard_page(
If the merge_block's parent block is not same,
we cannot use btr_check_node_ptr() */
ut_ad(parent_is_different
|| btr_check_node_ptr(index, merge_block, mtr));
|| btr_check_node_ptr(index, merge_block,
cursor->rtr_info
? cursor->rtr_info->thr
: nullptr, mtr));
if (btr_cur_get_block(&parent_cursor)->page.id().page_no()
== index->page
&& !page_has_siblings(btr_cur_get_page(&parent_cursor))
&& page_get_n_recs(btr_cur_get_page(&parent_cursor))
== 1) {
btr_lift_page_up(index, merge_block, mtr, &err);
btr_lift_page_up(index, merge_block,
cursor->rtr_info
? cursor->rtr_info->thr
: nullptr, mtr, &err);
}
}
@@ -4304,13 +4029,6 @@ btr_print_size(
fseg_header_t* seg;
mtr_t mtr;
if (dict_index_is_ibuf(index)) {
fputs("Sorry, cannot print info of an ibuf tree:"
" use ibuf functions\n", stderr);
return;
}
mtr_start(&mtr);
root = btr_root_get(index, &mtr);
@@ -4320,13 +4038,10 @@ btr_print_size(
fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
fseg_print(seg, &mtr);
if (!dict_index_is_ibuf(index)) {
seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
fseg_print(seg, &mtr);
}
fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
fseg_print(seg, &mtr);
mtr_commit(&mtr);
}
@@ -4437,6 +4152,7 @@ btr_check_node_ptr(
/*===============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: index page */
que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr) /*!< in: mtr */
{
mem_heap_t* heap;
@@ -4458,8 +4174,8 @@ btr_check_node_ptr(
heap = mem_heap_create(256);
if (dict_index_is_spatial(index)) {
offsets = rtr_page_get_father_block(NULL, heap, mtr,
NULL, &cursor);
offsets = rtr_page_get_father_block(NULL, heap,
NULL, &cursor, thr, mtr);
} else {
offsets = btr_page_get_father_block(NULL, heap, mtr, &cursor);
}
@@ -4532,14 +4248,6 @@ btr_index_rec_validate(
ut_ad(index->n_core_fields);
if (index->is_ibuf()) {
/* The insert buffer index tree can contain records from any
other index: we cannot check the number of fields or
their length */
return(TRUE);
}
#ifdef VIRTUAL_INDEX_DEBUG
if (dict_index_has_virtual(index)) {
fprintf(stderr, "index name is %s\n", index->name());
@@ -4882,8 +4590,7 @@ corrupted:
savepoint2 = mtr_set_savepoint(&mtr);
block = btr_block_get(*index, left_page_no,
RW_SX_LATCH, false,
&mtr, &err);
RW_SX_LATCH, &mtr, &err);
if (!block) {
goto invalid_page;
}
@@ -4961,7 +4668,7 @@ func_exit:
savepoint = mtr_set_savepoint(&mtr);
right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
!level, &mtr, &err);
&mtr, &err);
if (!right_block) {
btr_validate_report1(index, level, block);
fputs("InnoDB: broken FIL_PAGE_NEXT link\n", stderr);
@@ -5158,15 +4865,13 @@ broken_links:
if (parent_right_page_no != FIL_NULL) {
btr_block_get(*index,
parent_right_page_no,
RW_SX_LATCH, false,
&mtr);
RW_SX_LATCH, &mtr);
}
right_block = btr_block_get(*index,
right_page_no,
RW_SX_LATCH,
!level, &mtr,
&err);
&mtr, &err);
if (!right_block) {
btr_validate_report1(index, level,
block);
@@ -5252,17 +4957,16 @@ node_ptr_fails:
if (parent_right_page_no != FIL_NULL) {
btr_block_get(*index,
parent_right_page_no,
RW_SX_LATCH, false,
&mtr);
RW_SX_LATCH, &mtr);
}
} else if (parent_page_no != FIL_NULL) {
btr_block_get(*index, parent_page_no,
RW_SX_LATCH, false, &mtr);
RW_SX_LATCH, &mtr);
}
}
block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
!level, &mtr, &err);
&mtr, &err);
goto loop;
}
@@ -5334,8 +5038,7 @@ error:
index = btr_cur_get_index(cursor);
page = btr_cur_get_page(cursor);
mblock = btr_block_get(*index, page_no, RW_X_LATCH, page_is_leaf(page),
mtr);
mblock = btr_block_get(*index, page_no, RW_X_LATCH, mtr);
if (!mblock) {
goto error;
}

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2014, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +28,6 @@ Created 03/11/2014 Shaohua Wang
#include "btr0btr.h"
#include "btr0cur.h"
#include "btr0pcur.h"
#include "ibuf0ibuf.h"
#include "page0page.h"
#include "trx0trx.h"
@@ -107,7 +106,7 @@ oom:
}
} else {
new_block = btr_block_get(*m_index, m_page_no, RW_X_LATCH,
false, &m_mtr);
&m_mtr);
if (!new_block) {
m_mtr.commit();
return(DB_CORRUPTION);
@@ -122,7 +121,7 @@ oom:
m_page_zip = buf_block_get_page_zip(new_block);
if (!m_level && dict_index_is_sec_or_ibuf(m_index)) {
if (!m_level && !m_index->is_primary()) {
page_update_max_trx_id(new_block, m_page_zip, m_trx_id,
&m_mtr);
}
@@ -563,9 +562,6 @@ inline void PageBulk::finish()
void PageBulk::commit(bool success)
{
finish();
if (success && !m_index->is_clust() && page_is_leaf(m_page))
ibuf_set_bitmap_for_bulk_load(m_block, &m_mtr,
innobase_fill_factor == 100);
m_mtr.commit();
}
@@ -1194,7 +1190,7 @@ BtrBulk::finish(dberr_t err)
ut_ad(last_page_no != FIL_NULL);
last_block = btr_block_get(*m_index, last_page_no, RW_X_LATCH,
false, &mtr);
&mtr);
if (!last_block) {
err = DB_CORRUPTION;
err_exit:

View File

@@ -3,7 +3,7 @@
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2015, 2022, MariaDB Corporation.
Copyright (c) 2015, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -61,7 +61,6 @@ Created 10/16/1994 Heikki Tuuri
#include "que0que.h"
#include "row0row.h"
#include "srv0srv.h"
#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "zlib.h"
#include "srv0start.h"
@@ -73,15 +72,6 @@ Created 10/16/1994 Heikki Tuuri
#endif /* WITH_WSREP */
#include "log.h"
/** Buffered B-tree operation types, introduced as part of delete buffering. */
enum btr_op_t {
BTR_NO_OP = 0, /*!< Not buffered */
BTR_INSERT_OP, /*!< Insert, do not ignore UNIQUE */
BTR_INSERT_IGNORE_UNIQUE_OP, /*!< Insert, ignoring UNIQUE */
BTR_DELETE_OP, /*!< Purge a delete-marked record */
BTR_DELMARK_OP /*!< Mark a record for deletion */
};
/** Modification types for the B-tree operation.
Note that the order must be DELETE, BOTH, INSERT !!
*/
@@ -259,7 +249,7 @@ latch_block:
if (left_page_no != FIL_NULL) {
buf_block_t *b = btr_block_get(
*cursor->index(), left_page_no, RW_X_LATCH,
true, mtr);
mtr);
if (latch_leaves) {
latch_leaves->savepoints[0] = save;
@@ -300,7 +290,7 @@ latch_block:
buf_block_t* b = btr_block_get(
*cursor->index(), right_page_no, RW_X_LATCH,
true, mtr);
mtr);
if (latch_leaves) {
latch_leaves->savepoints[2] = save;
latch_leaves->blocks[2] = b;
@@ -331,8 +321,7 @@ latch_block:
if (left_page_no != FIL_NULL) {
save = mtr->get_savepoint();
cursor->left_block = btr_block_get(
*cursor->index(), left_page_no,
mode, true, mtr);
*cursor->index(), left_page_no, mode, mtr);
if (latch_leaves) {
latch_leaves->savepoints[0] = save;
latch_leaves->blocks[0] = cursor->left_block;
@@ -341,7 +330,8 @@ latch_block:
goto latch_block;
case BTR_CONT_MODIFY_TREE:
ut_ad(cursor->index()->is_spatial());
ut_ad(cursor->index()->is_dummy
|| cursor->index()->is_spatial());
return;
}
@@ -758,7 +748,7 @@ btr_cur_optimistic_latch_leaves(
return(buf_page_optimistic_get(*latch_mode, block,
modify_clock, mtr));
case BTR_SEARCH_PREV: /* btr_pcur_move_backward_from_page() */
case BTR_MODIFY_PREV: /* Ditto, or ibuf_insert() */
case BTR_MODIFY_PREV: /* Ditto */
uint32_t curr_page_no, left_page_no;
{
transactional_shared_lock_guard<block_lock> g{
@@ -1063,20 +1053,6 @@ btr_cur_need_opposite_intention(
@return maximum size of a node pointer record in bytes */
static ulint btr_node_ptr_max_size(const dict_index_t* index)
{
if (dict_index_is_ibuf(index)) {
/* cannot estimate accurately */
/* This is universal index for change buffer.
The max size of the entry is about max key length * 2.
(index key + primary key to be inserted to the index)
(The max key length is UNIV_PAGE_SIZE / 16 * 3 at
ha_innobase::max_supported_key_length(),
considering MAX_KEY_LENGTH = 3072 at MySQL imposes
the 3500 historical InnoDB value for 16K page size case.)
For the universal index, node_ptr contains most of the entry.
And 512 is enough to contain ibuf columns and meta-data */
return srv_page_size / 8 * 3 + 512;
}
/* Each record has page_no, length of page_no and header. */
ulint comp = dict_table_is_comp(index->table);
ulint rec_max_size = comp
@@ -1245,10 +1221,8 @@ dberr_t btr_cur_search_to_nth_level(ulint level,
ulint rw_latch;
page_cur_mode_t page_mode;
page_cur_mode_t search_mode = PAGE_CUR_UNSUPP;
ulint buf_mode;
ulint node_ptr_max_size = srv_page_size / 2;
page_cur_t* page_cursor;
btr_op_t btr_op;
ulint root_height = 0; /* remove warning */
btr_intention_t lock_intention;
@@ -1287,7 +1261,6 @@ dberr_t btr_cur_search_to_nth_level(ulint level,
ut_ad(level == 0 || mode == PAGE_CUR_LE
|| RTREE_SEARCH_MODE(mode));
ut_ad(dict_index_check_search_tuple(index, tuple));
ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr));
ut_ad(dtuple_check_typed(tuple));
ut_ad(!(index->type & DICT_FTS));
ut_ad(index->page != FIL_NULL);
@@ -1308,36 +1281,6 @@ dberr_t btr_cur_search_to_nth_level(ulint level,
|| mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK
| MTR_MEMO_SX_LOCK));
/* These flags are mutually exclusive, they are lumped together
with the latch mode for historical reasons. It's possible for
none of the flags to be set. */
switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) {
default:
btr_op = BTR_NO_OP;
break;
case BTR_INSERT:
btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE)
? BTR_INSERT_IGNORE_UNIQUE_OP
: BTR_INSERT_OP;
break;
case BTR_DELETE:
btr_op = BTR_DELETE_OP;
ut_a(cursor->purge_node);
break;
case BTR_DELETE_MARK:
btr_op = BTR_DELMARK_OP;
break;
}
/* Operations on the insert buffer tree cannot be buffered. */
ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
/* Operations on the clustered index cannot be buffered. */
ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
/* Operations on the temporary table(indexes) cannot be buffered. */
ut_ad(btr_op == BTR_NO_OP || !index->table->is_temporary());
/* Operation on the spatial index cannot be buffered. */
ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index));
lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
/* Turn the flags unrelated to the latch mode off. */
@@ -1512,7 +1455,6 @@ x_latch_index:
btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}};
search_loop:
buf_mode = BUF_GET;
rw_latch = RW_NO_LATCH;
rtree_parent_modified = false;
@@ -1534,108 +1476,16 @@ search_loop:
}
} else if (latch_mode <= BTR_MODIFY_LEAF) {
rw_latch = latch_mode;
if (btr_op != BTR_NO_OP
&& ibuf_should_try(index, btr_op != BTR_INSERT_OP)) {
/* Try to buffer the operation if the leaf
page is not in the buffer pool. */
buf_mode = btr_op == BTR_DELETE_OP
? BUF_GET_IF_IN_POOL_OR_WATCH
: BUF_GET_IF_IN_POOL;
}
}
retry_page_get:
ut_ad(n_blocks < BTR_MAX_LEVELS);
tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
dberr_t err;
block = buf_page_get_gen(page_id, zip_size, rw_latch, guess,
buf_mode, mtr, &err,
height == 0 && !index->is_clust());
BUF_GET, mtr, &err);
if (!block) {
switch (err) {
case DB_SUCCESS:
/* change buffering */
break;
case DB_DECRYPTION_FAILED:
btr_decryption_failed(*index);
/* fall through */
default:
goto func_exit;
}
/* This must be a search to perform an insert/delete
mark/ delete; try using the insert/delete buffer */
ut_ad(height == 0);
ut_ad(cursor->thr);
switch (btr_op) {
default:
MY_ASSERT_UNREACHABLE();
break;
case BTR_INSERT_OP:
case BTR_INSERT_IGNORE_UNIQUE_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
page_id, zip_size, cursor->thr)) {
cursor->flag = BTR_CUR_INSERT_TO_IBUF;
goto func_exit;
}
break;
case BTR_DELMARK_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
index, page_id, zip_size,
cursor->thr)) {
cursor->flag = BTR_CUR_DEL_MARK_IBUF;
goto func_exit;
}
break;
case BTR_DELETE_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
ut_ad(index->is_btree());
auto& chain = buf_pool.page_hash.cell_get(
page_id.fold());
if (!row_purge_poss_sec(cursor->purge_node,
index, tuple)) {
/* The record cannot be purged yet. */
cursor->flag = BTR_CUR_DELETE_REF;
} else if (ibuf_insert(IBUF_OP_DELETE, tuple,
index, page_id, zip_size,
cursor->thr)) {
/* The purge was buffered. */
cursor->flag = BTR_CUR_DELETE_IBUF;
} else {
/* The purge could not be buffered. */
buf_pool.watch_unset(page_id, chain);
break;
}
buf_pool.watch_unset(page_id, chain);
goto func_exit;
}
/* Insert to the insert/delete buffer did not succeed, we
must read the page from disk. */
buf_mode = BUF_GET;
goto retry_page_get;
goto func_exit;
}
tree_blocks[n_blocks] = block;
@@ -1658,7 +1508,7 @@ retry_page_get:
= mtr_set_savepoint(mtr);
buf_block_t* get_block = buf_page_get_gen(
page_id_t(page_id.space(), left_page_no),
zip_size, rw_latch, NULL, buf_mode,
zip_size, rw_latch, NULL, BUF_GET,
mtr, &err);
if (!get_block) {
if (err == DB_DECRYPTION_FAILED) {
@@ -1729,7 +1579,6 @@ retry_page_get:
}
/* Save the MBR */
cursor->rtr_info->thr = cursor->thr;
rtr_get_mbr_from_tuple(tuple, &cursor->rtr_info->mbr);
}
@@ -1902,15 +1751,16 @@ retry_page_get:
/* Add Predicate lock if it is serializable isolation
and only if it is in the search case */
if (dict_index_is_spatial(index)
if (index->is_spatial()
&& cursor->rtr_info->need_prdt_lock
&& mode >= PAGE_CUR_CONTAIN
&& mode != PAGE_CUR_RTREE_INSERT
&& mode != PAGE_CUR_RTREE_LOCATE
&& mode >= PAGE_CUR_CONTAIN) {
&& mode != PAGE_CUR_RTREE_LOCATE) {
lock_prdt_t prdt;
que_thr_t* thr = cursor->rtr_info->thr;
{
trx_t* trx = thr_get_trx(cursor->thr);
trx_t* trx = thr_get_trx(thr);
TMLockTrxGuard g{TMLockTrxArgs(*trx)};
lock_init_prdt_from_mbr(
&prdt, &cursor->rtr_info->mbr, mode,
@@ -1922,7 +1772,7 @@ retry_page_get:
}
lock_prdt_lock(block, &prdt, index, LOCK_S,
LOCK_PREDICATE, cursor->thr);
LOCK_PREDICATE, thr);
if (rw_latch == RW_NO_LATCH && height != 0) {
block->page.lock.s_unlock();
@@ -2230,17 +2080,6 @@ need_opposite_intention:
n_blocks++;
if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
/* We're doing a search on an ibuf tree and we're one
level above the leaf page. */
ut_ad(level == 0);
buf_mode = BUF_GET;
rw_latch = RW_NO_LATCH;
goto retry_page_get;
}
if (dict_index_is_spatial(index)
&& page_mode >= PAGE_CUR_CONTAIN
&& page_mode != PAGE_CUR_RTREE_INSERT) {
@@ -2311,7 +2150,7 @@ need_opposite_intention:
btr_block_get(
*index, page_id.page_no(),
latch_mode == BTR_CONT_MODIFY_TREE
? RW_X_LATCH : RW_SX_LATCH, false, mtr, &err);
? RW_X_LATCH : RW_SX_LATCH, mtr, &err);
} else {
ut_ad(mtr->memo_contains_flagged(block,
upper_rw_latch));
@@ -2440,10 +2279,12 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
rw_lock_type_t upper_rw_latch= RW_X_LATCH;
switch (latch_mode) {
case BTR_CONT_MODIFY_TREE:
case BTR_CONT_SEARCH_TREE:
abort();
break;
ut_ad("invalid mode" == 0);
/* fall through */
case BTR_CONT_MODIFY_TREE:
ut_ad(index->is_dummy);
/* fall through */
case BTR_MODIFY_TREE:
/* Most of delete-intended operations are purging. Free blocks
and read IO bandwidth should be prioritized for them, when the
@@ -2489,9 +2330,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
const rw_lock_type_t rw_latch= height && latch_mode != BTR_MODIFY_TREE
? upper_rw_latch
: RW_NO_LATCH;
buf_block_t* block=
btr_block_get(*index, page, rw_latch, !height && !index->is_clust(), mtr,
&err);
buf_block_t* block= btr_block_get(*index, page, rw_latch, mtr, &err);
ut_ad(!block == (err != DB_SUCCESS));
@@ -2514,7 +2353,9 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
/* We are in the root node */
height= l;
if (height);
else if (upper_rw_latch != root_leaf_rw_latch)
else if (upper_rw_latch != root_leaf_rw_latch &&
/* ibuf_upgrade() needs the following condition */
UNIV_LIKELY(root_leaf_rw_latch != RW_NO_LATCH))
{
/* We should retry to get the page, because the root page
is latched with different level as a leaf page. */
@@ -2635,11 +2476,6 @@ be freed by reorganizing. Differs from btr_cur_optimistic_insert because
no heuristics is applied to whether it pays to use CPU time for
reorganizing the page or not.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to inserted record if succeed, else NULL */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
rec_t*
@@ -2808,9 +2644,6 @@ static void btr_cur_prefetch_siblings(const buf_block_t *block,
{
ut_ad(page_is_leaf(block->page.frame));
if (index->is_ibuf())
return;
const page_t *page= block->page.frame;
uint32_t prev= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_PREV));
uint32_t next= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_NEXT));
@@ -3046,14 +2879,6 @@ fail_err:
if (*rec) {
} else if (block->page.zip.data) {
ut_ad(!index->table->is_temporary());
/* Reset the IBUF_BITMAP_FREE bits, because
page_cur_tuple_insert() will have attempted page
reorganize before failing. */
if (leaf
&& !dict_index_is_clust(index)) {
ibuf_reset_free_bits(block);
}
goto fail;
} else {
ut_ad(!reorg);
@@ -3094,34 +2919,6 @@ fail_err:
lock_update_insert(block, *rec);
}
if (leaf
&& !dict_index_is_clust(index)
&& !index->table->is_temporary()) {
/* Update the free bits of the B-tree page in the
insert buffer bitmap. */
/* The free bits in the insert buffer bitmap must
never exceed the free space on a page. It is safe to
decrement or reset the bits in the bitmap in a
mini-transaction that is committed before the
mini-transaction that affects the free space. */
/* It is unsafe to increment the bits in a separately
committed mini-transaction, because in crash recovery,
the free bits could momentarily be set too high. */
if (block->page.zip.data) {
/* Update the bits in the same mini-transaction. */
ibuf_update_free_bits_zip(block, mtr);
} else {
/* Decrement the bits in a separate
mini-transaction. */
ibuf_update_free_bits_if_full(
block, max_size,
rec_size + PAGE_DIR_SLOT_SIZE);
}
}
*big_rec = big_rec_vec;
return(DB_SUCCESS);
@@ -3192,12 +2989,10 @@ btr_cur_pessimistic_insert(
the index tree, so that the insert will not fail because of
lack of space */
if (!index->is_ibuf()
&& (err = fsp_reserve_free_extents(&n_reserved, index->table->space,
uint32_t(cursor->tree_height / 16
+ 3),
FSP_NORMAL, mtr))
!= DB_SUCCESS) {
err = fsp_reserve_free_extents(&n_reserved, index->table->space,
uint32_t(cursor->tree_height / 16 + 3),
FSP_NORMAL, mtr);
if (err != DB_SUCCESS) {
return err;
}
@@ -3229,11 +3024,21 @@ btr_cur_pessimistic_insert(
}
}
*rec = index->page == btr_cur_get_block(cursor)->page.id().page_no()
? btr_root_raise_and_insert(flags, cursor, offsets, heap,
entry, n_ext, mtr, &err)
: btr_page_split_and_insert(flags, cursor, offsets, heap,
entry, n_ext, mtr, &err);
if (index->page == btr_cur_get_block(cursor)->page.id().page_no()) {
*rec = index->is_spatial()
? rtr_root_raise_and_insert(flags, cursor, offsets,
heap, entry, n_ext, mtr,
&err, thr)
: btr_root_raise_and_insert(flags, cursor, offsets,
heap, entry, n_ext, mtr,
&err);
} else if (index->is_spatial()) {
*rec = rtr_page_split_and_insert(flags, cursor, offsets, heap,
entry, n_ext, mtr, &err, thr);
} else {
*rec = btr_page_split_and_insert(flags, cursor, offsets, heap,
entry, n_ext, mtr, &err);
}
if (!*rec) {
goto func_exit;
@@ -3477,14 +3282,8 @@ static dberr_t btr_cur_upd_rec_sys(buf_block_t *block, rec_t *rec,
See if there is enough place in the page modification log to log
an update-in-place.
@retval false if out of space; IBUF_BITMAP_FREE will be reset
outside mtr if the page was recompressed
@retval true if enough place;
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
a secondary index leaf page. This has to be done either within the
same mini-transaction, or by invoking ibuf_reset_free_bits() before
mtr_commit(mtr). */
@retval false if out of space
@retval true if enough place */
bool
btr_cur_update_alloc_zip_func(
/*==========================*/
@@ -3505,7 +3304,6 @@ btr_cur_update_alloc_zip_func(
const page_t* page = page_cur_get_page(cursor);
ut_ad(page_zip == page_cur_get_page_zip(cursor));
ut_ad(!dict_index_is_ibuf(index));
ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets));
if (page_zip_available(page_zip, dict_index_is_clust(index),
@@ -3529,26 +3327,8 @@ btr_cur_update_alloc_zip_func(
rec_offs_make_valid(page_cur_get_rec(cursor), index,
page_is_leaf(page), offsets);
/* After recompressing a page, we must make sure that the free
bits in the insert buffer bitmap will not exceed the free
space on the page. Because this function will not attempt
recompression unless page_zip_available() fails above, it is
safe to reset the free bits if page_zip_available() fails
again, below. The free bits can safely be reset in a separate
mini-transaction. If page_zip_available() succeeds below, we
can be sure that the btr_page_reorganize() above did not reduce
the free space available on the page. */
if (page_zip_available(page_zip, dict_index_is_clust(index),
length, create)) {
return true;
}
}
if (!dict_index_is_clust(index)
&& !index->table->is_temporary()
&& page_is_leaf(page)) {
ibuf_reset_free_bits(page_cur_get_block(cursor));
return page_zip_available(page_zip, dict_index_is_clust(index),
length, create);
}
return(false);
@@ -3697,7 +3477,7 @@ We assume here that the ordering fields of the record do not change.
@return locking or undo log related error code, or
@retval DB_SUCCESS on success
@retval DB_ZIP_OVERFLOW if there is not enough space left
on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
on a ROW_FORMAT=COMPRESSED page */
dberr_t
btr_cur_update_in_place(
/*====================*/
@@ -3717,7 +3497,6 @@ btr_cur_update_in_place(
further pages */
{
dict_index_t* index;
dberr_t err;
rec_t* rec;
roll_ptr_t roll_ptr = 0;
ulint was_delete_marked;
@@ -3725,17 +3504,14 @@ btr_cur_update_in_place(
ut_ad(page_is_leaf(cursor->page_cur.block->page.frame));
rec = btr_cur_get_rec(cursor);
index = cursor->index();
ut_ad(!index->is_ibuf());
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
ut_ad(trx_id > 0 || (flags & BTR_KEEP_SYS_FLAG)
|| index->table->is_temporary());
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
|| dict_index_is_clust(index));
|| index->is_primary());
ut_ad(thr_get_trx(thr)->id == trx_id
|| (flags & ulint(~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP)))
|| (flags & ulint(~BTR_KEEP_POS_FLAG))
== (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
ut_ad(fil_page_index_page_check(btr_cur_get_page(cursor)));
@@ -3765,22 +3541,17 @@ btr_cur_update_in_place(
}
/* Do lock checking and undo logging */
err = btr_cur_upd_lock_and_undo(flags, cursor, offsets,
update, cmpl_info,
thr, mtr, &roll_ptr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
/* We may need to update the IBUF_BITMAP_FREE
bits after a reorganize that was done in
btr_cur_update_alloc_zip(). */
goto func_exit;
if (dberr_t err = btr_cur_upd_lock_and_undo(flags, cursor, offsets,
update, cmpl_info,
thr, mtr, &roll_ptr)) {
return err;
}
if (!(flags & BTR_KEEP_SYS_FLAG)) {
err = btr_cur_upd_rec_sys(block, rec, index, offsets,
thr_get_trx(thr), roll_ptr, mtr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
goto func_exit;
}
if (flags & BTR_KEEP_SYS_FLAG) {
} else if (dberr_t err = btr_cur_upd_rec_sys(block, rec, index, offsets,
thr_get_trx(thr),
roll_ptr, mtr)) {
return err;
}
was_delete_marked = rec_get_deleted_flag(
@@ -3838,19 +3609,7 @@ btr_cur_update_in_place(
btr_cur_unmark_extern_fields(block, rec, index, offsets, mtr);
}
ut_ad(err == DB_SUCCESS);
func_exit:
if (page_zip
&& !(flags & BTR_KEEP_IBUF_BITMAP)
&& !dict_index_is_clust(index)
&& page_is_leaf(buf_block_get_frame(block))) {
/* Update the free bits in the insert buffer. */
ut_ad(!index->table->is_temporary());
ibuf_update_free_bits_zip(block, mtr);
}
return(err);
return DB_SUCCESS;
}
/** Trim a metadata record during the rollback of instant ALTER TABLE.
@@ -3994,7 +3753,7 @@ fields of the record do not change.
@retval DB_OVERFLOW if the updated record does not fit
@retval DB_UNDERFLOW if the page would become too empty
@retval DB_ZIP_OVERFLOW if there is not enough space left
on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
on a ROW_FORMAT=COMPRESSED page */
dberr_t
btr_cur_optimistic_update(
/*======================*/
@@ -4025,7 +3784,6 @@ btr_cur_optimistic_update(
ulint max_size;
ulint new_rec_size;
ulint old_rec_size;
ulint max_ins_size = 0;
dtuple_t* new_entry;
roll_ptr_t roll_ptr;
ulint i;
@@ -4034,19 +3792,16 @@ btr_cur_optimistic_update(
page = buf_block_get_frame(block);
rec = btr_cur_get_rec(cursor);
index = cursor->index();
ut_ad(index->has_locking());
ut_ad(trx_id > 0 || (flags & BTR_KEEP_SYS_FLAG)
|| index->table->is_temporary());
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
/* This is intended only for leaf page updates */
ut_ad(page_is_leaf(page));
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
|| dict_index_is_clust(index));
ut_ad(thr_get_trx(thr)->id == trx_id
|| (flags & ulint(~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP)))
|| (flags & ulint(~BTR_KEEP_POS_FLAG))
== (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
ut_ad(fil_page_index_page_check(page));
@@ -4075,7 +3830,6 @@ btr_cur_optimistic_update(
if (rec_offs_any_extern(*offsets)) {
any_extern:
ut_ad(!index->is_ibuf());
/* Externally stored fields are treated in pessimistic
update */
@@ -4156,9 +3910,6 @@ any_extern:
if (UNIV_UNLIKELY(new_rec_size
>= (page_get_free_space_of_empty(page_is_comp(page))
/ 2))) {
/* We may need to update the IBUF_BITMAP_FREE
bits after a reorganize that was done in
btr_cur_update_alloc_zip(). */
err = DB_OVERFLOW;
goto func_exit;
}
@@ -4166,10 +3917,6 @@ any_extern:
if (UNIV_UNLIKELY(page_get_data_size(page)
- old_rec_size + new_rec_size
< BTR_CUR_PAGE_COMPRESS_LIMIT(index))) {
/* We may need to update the IBUF_BITMAP_FREE
bits after a reorganize that was done in
btr_cur_update_alloc_zip(). */
/* The page would become too empty */
err = DB_UNDERFLOW;
goto func_exit;
@@ -4182,19 +3929,9 @@ any_extern:
: (old_rec_size
+ page_get_max_insert_size_after_reorganize(page, 1));
if (!page_zip) {
max_ins_size = page_get_max_insert_size_after_reorganize(
page, 1);
}
if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
&& (max_size >= new_rec_size))
|| (page_get_n_recs(page) <= 1))) {
/* We may need to update the IBUF_BITMAP_FREE
bits after a reorganize that was done in
btr_cur_update_alloc_zip(). */
/* There was not enough space, or it did not pay to
reorganize: for simplicity, we decide what to do assuming a
reorganization is needed, though it might not be necessary */
@@ -4208,9 +3945,6 @@ any_extern:
update, cmpl_info,
thr, mtr, &roll_ptr);
if (err != DB_SUCCESS) {
/* We may need to update the IBUF_BITMAP_FREE
bits after a reorganize that was done in
btr_cur_update_alloc_zip(). */
goto func_exit;
}
@@ -4266,22 +4000,11 @@ any_extern:
ut_ad(err == DB_SUCCESS);
if (!page_cur_move_to_next(page_cursor)) {
corrupted:
err = DB_CORRUPTION;
}
func_exit:
if (!(flags & BTR_KEEP_IBUF_BITMAP)
&& !dict_index_is_clust(index)) {
/* Update the free bits in the insert buffer. */
if (page_zip) {
ut_ad(!index->table->is_temporary());
ibuf_update_free_bits_zip(block, mtr);
} else if (!index->table->is_temporary()) {
ibuf_update_free_bits_low(block, max_ins_size, mtr);
}
return DB_CORRUPTION;
}
if (err != DB_SUCCESS) {
func_exit:
/* prefetch siblings of the leaf for the pessimistic
operation. */
btr_cur_prefetch_siblings(block, index);
@@ -4378,7 +4101,6 @@ btr_cur_pessimistic_update(
big_rec_t* dummy_big_rec;
dict_index_t* index;
buf_block_t* block;
page_zip_des_t* page_zip;
rec_t* rec;
page_cur_t* page_cursor;
dberr_t err;
@@ -4391,20 +4113,19 @@ btr_cur_pessimistic_update(
*big_rec = NULL;
block = btr_cur_get_block(cursor);
page_zip = buf_block_get_page_zip(block);
index = cursor->index();
ut_ad(index->has_locking());
ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK |
MTR_MEMO_SX_LOCK));
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
#if defined UNIV_ZIP_DEBUG || defined UNIV_DEBUG
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
#endif
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip
|| page_zip_validate(page_zip, block->page.frame, index));
#endif /* UNIV_ZIP_DEBUG */
ut_ad(!page_zip || !index->table->is_temporary());
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
ut_ad(trx_id > 0 || (flags & BTR_KEEP_SYS_FLAG)
|| index->table->is_temporary());
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
@@ -4415,7 +4136,7 @@ btr_cur_pessimistic_update(
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
err = optim_err = btr_cur_optimistic_update(
flags | BTR_KEEP_IBUF_BITMAP,
flags,
cursor, offsets, offsets_heap, update,
cmpl_info, thr, trx_id, mtr);
@@ -4426,18 +4147,6 @@ btr_cur_pessimistic_update(
break;
default:
err_exit:
/* We suppressed this with BTR_KEEP_IBUF_BITMAP.
For DB_ZIP_OVERFLOW, the IBUF_BITMAP_FREE bits were
already reset by btr_cur_update_alloc_zip() if the
page was recompressed. */
if (page_zip
&& optim_err != DB_ZIP_OVERFLOW
&& !dict_index_is_clust(index)
&& page_is_leaf(block->page.frame)) {
ut_ad(!index->table->is_temporary());
ibuf_update_free_bits_zip(block, mtr);
}
if (big_rec_vec != NULL) {
dtuple_big_rec_free(big_rec_vec);
}
@@ -4515,11 +4224,6 @@ btr_cur_pessimistic_update(
index->first_user_field())))) {
big_rec_vec = dtuple_convert_big_rec(index, update, new_entry, &n_ext);
if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
/* We cannot goto return_after_reservations,
because we may need to update the
IBUF_BITMAP_FREE bits, which was suppressed by
BTR_KEEP_IBUF_BITMAP. */
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip
|| page_zip_validate(page_zip, block->page.frame,
@@ -4564,11 +4268,6 @@ btr_cur_pessimistic_update(
btr_cur_write_sys(new_entry, index, trx_id, roll_ptr);
}
const ulint max_ins_size = page_zip
? 0
: page_get_max_insert_size_after_reorganize(block->page.frame,
1);
if (UNIV_UNLIKELY(is_metadata)) {
ut_ad(new_entry->is_metadata());
ut_ad(index->is_instant());
@@ -4653,18 +4352,6 @@ btr_cur_pessimistic_update(
rec_offs_make_valid(page_cursor->rec, index,
true, *offsets);
}
} else if (!dict_index_is_clust(index)
&& page_is_leaf(block->page.frame)) {
/* Update the free bits in the insert buffer.
This is the same block which was skipped by
BTR_KEEP_IBUF_BITMAP. */
if (page_zip) {
ut_ad(!index->table->is_temporary());
ibuf_update_free_bits_zip(block, mtr);
} else if (!index->table->is_temporary()) {
ibuf_update_free_bits_low(block, max_ins_size,
mtr);
}
}
if (!srv_read_only_mode
@@ -4686,16 +4373,7 @@ btr_cur_pessimistic_update(
of a badly-compressing record, it is possible for
btr_cur_optimistic_update() to return DB_UNDERFLOW and
btr_cur_insert_if_possible() to return FALSE. */
ut_a(page_zip || optim_err != DB_UNDERFLOW);
/* Out of space: reset the free bits.
This is the same block which was skipped by
BTR_KEEP_IBUF_BITMAP. */
if (!dict_index_is_clust(index)
&& !index->table->is_temporary()
&& page_is_leaf(block->page.frame)) {
ibuf_reset_free_bits(block);
}
ut_ad(page_zip || optim_err != DB_UNDERFLOW);
}
if (big_rec_vec != NULL) {
@@ -4740,8 +4418,7 @@ btr_cur_pessimistic_update(
same temp-table in parallel.
max_trx_id is ignored for temp tables because it not required
for MVCC. */
if (dict_index_is_sec_or_ibuf(index)
&& !index->table->is_temporary()) {
if (!index->is_primary() && !index->table->is_temporary()) {
/* Update PAGE_MAX_TRX_ID in the index page header.
It was not updated by btr_cur_pessimistic_insert()
because of BTR_NO_LOCKING_FLAG. */
@@ -5052,9 +4729,6 @@ btr_cur_optimistic_delete(
}
{
page_t* page = buf_block_get_frame(block);
page_zip_des_t* page_zip= buf_block_get_page_zip(block);
if (UNIV_UNLIKELY(rec_get_info_bits(rec, page_rec_is_comp(rec))
& REC_INFO_MIN_REC_FLAG)) {
/* This should be rolling back instant ADD COLUMN.
@@ -5063,7 +4737,7 @@ btr_cur_optimistic_delete(
insert into SYS_COLUMNS is rolled back. */
ut_ad(cursor->index()->table->supports_instant());
ut_ad(cursor->index()->is_primary());
ut_ad(!page_zip);
ut_ad(!buf_block_get_page_zip(block));
page_cur_delete_rec(btr_cur_get_page_cur(cursor),
offsets, mtr);
/* We must empty the PAGE_FREE list, because
@@ -5081,40 +4755,8 @@ btr_cur_optimistic_delete(
btr_search_update_hash_on_delete(cursor);
}
if (page_zip) {
#ifdef UNIV_ZIP_DEBUG
ut_a(page_zip_validate(page_zip, page,
cursor->index()));
#endif /* UNIV_ZIP_DEBUG */
page_cur_delete_rec(btr_cur_get_page_cur(cursor),
offsets, mtr);
#ifdef UNIV_ZIP_DEBUG
ut_a(page_zip_validate(page_zip, page,
cursor->index()));
#endif /* UNIV_ZIP_DEBUG */
/* On compressed pages, the IBUF_BITMAP_FREE
space is not affected by deleting (purging)
records, because it is defined as the minimum
of space available *without* reorganize, and
space available in the modification log. */
} else {
const ulint max_ins
= page_get_max_insert_size_after_reorganize(
page, 1);
page_cur_delete_rec(btr_cur_get_page_cur(cursor),
offsets, mtr);
/* The change buffer does not handle inserts
into non-leaf pages, into clustered indexes,
or into the change buffer. */
if (!cursor->index()->is_clust()
&& !cursor->index()->table->is_temporary()
&& !dict_index_is_ibuf(cursor->index())) {
ibuf_update_free_bits_low(block, max_ins, mtr);
}
}
page_cur_delete_rec(btr_cur_get_page_cur(cursor),
offsets, mtr);
}
func_exit:
@@ -5310,9 +4952,9 @@ discard_page:
goto err_exit;
}
btr_cur_t cursor;
cursor.page_cur.index = index;
cursor.page_cur.block = block;
btr_cur_t cur;
cur.page_cur.index = index;
cur.page_cur.block = block;
if (!page_has_prev(page)) {
/* If we delete the leftmost node pointer on a
@@ -5328,16 +4970,17 @@ discard_page:
rec_offs* offsets;
ulint len;
rtr_page_get_father_block(NULL, heap, mtr, NULL,
&cursor);
father_rec = btr_cur_get_rec(&cursor);
rtr_page_get_father_block(nullptr, heap, nullptr,
&cur,
cursor->rtr_info->thr, mtr);
father_rec = btr_cur_get_rec(&cur);
offsets = rec_get_offsets(father_rec, index, NULL,
0, ULINT_UNDEFINED, &heap);
rtr_read_mbr(rec_get_nth_field(
father_rec, offsets, 0, &len), &father_mbr);
rtr_update_mbr_field(&cursor, offsets, NULL,
rtr_update_mbr_field(&cur, offsets, NULL,
page, &father_mbr, next_rec, mtr);
ut_d(parent_latched = true);
} else {
@@ -5345,12 +4988,12 @@ discard_page:
on a page, we have to change the parent node pointer
so that it is equal to the new leftmost node pointer
on the page */
ret = btr_page_get_father(mtr, &cursor);
ret = btr_page_get_father(mtr, &cur);
if (!ret) {
*err = DB_CORRUPTION;
goto err_exit;
}
*err = btr_cur_node_ptr_delete(&cursor, mtr);
*err = btr_cur_node_ptr_delete(&cur, mtr);
if (*err != DB_SUCCESS) {
got_err:
ret = FALSE;
@@ -5397,7 +5040,10 @@ got_err:
#endif /* UNIV_ZIP_DEBUG */
ut_ad(!parent_latched
|| btr_check_node_ptr(index, block, mtr));
|| btr_check_node_ptr(index, block,
cursor->rtr_info
? cursor->rtr_info->thr
: nullptr, mtr));
if (!ret && btr_cur_compress_recommendation(cursor, mtr)) {
if (UNIV_LIKELY(allow_merge)) {
@@ -5544,7 +5190,7 @@ public:
ulint parent_savepoint= m_savepoint;
m_savepoint= mtr_set_savepoint(&mtr);
m_block= btr_block_get(*index(), m_page_id.page_no(), RW_S_LATCH, !level,
m_block= btr_block_get(*index(), m_page_id.page_no(), RW_S_LATCH,
&mtr, nullptr);
if (parent_block && parent_block != right_parent)
@@ -5762,8 +5408,7 @@ static ha_rows btr_estimate_n_rows_in_range_on_level(
savepoint= mtr_set_savepoint(&mtr);
/* Fetch the page. */
block= btr_block_get(*index, page_id.page_no(), RW_S_LATCH, !level, &mtr,
nullptr);
block= btr_block_get(*index, page_id.page_no(), RW_S_LATCH, &mtr, nullptr);
if (prev_block)
mtr_release_block_at_savepoint(&mtr, prev_savepoint, prev_block);
@@ -6378,7 +6023,7 @@ struct btr_blob_log_check_t {
if (UNIV_UNLIKELY(page_no != FIL_NULL)) {
m_pcur->btr_cur.page_cur.block = btr_block_get(
*index, page_no, RW_X_LATCH, false, m_mtr);
*index, page_no, RW_X_LATCH, m_mtr);
/* The page should not be evicted or corrupted while
we are holding a buffer-fix on it. */
m_pcur->btr_cur.page_cur.block->page.unfix();

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved.
Copyright (C) 2014, 2022, MariaDB Corporation.
Copyright (C) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,6 @@ Modified 30/07/2014 Jan Lindström jan.lindstrom@mariadb.com
#include "dict0stats.h"
#include "dict0stats_bg.h"
#include "dict0defrag_bg.h"
#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "srv0start.h"
#include "mysqld.h"
@@ -330,20 +329,16 @@ btr_defragment_merge_pages(
// If max_ins_size >= move_size, we can move the records without
// reorganizing the page, otherwise we need to reorganize the page
// first to release more space.
if (move_size > max_ins_size) {
dberr_t err = btr_page_reorganize_block(page_zip_level,
to_block, index, mtr);
if (err != DB_SUCCESS) {
if (!dict_index_is_clust(index)
&& page_is_leaf(to_page)) {
ibuf_reset_free_bits(to_block);
}
// If reorganization fails, that means page is
// not compressable. There's no point to try
// merging into this page. Continue to the
// next page.
return err == DB_FAIL ? from_block : nullptr;
}
if (move_size <= max_ins_size) {
} else if (dberr_t err = btr_page_reorganize_block(page_zip_level,
to_block, index,
mtr)) {
// If reorganization fails, that means page is
// not compressable. There's no point to try
// merging into this page. Continue to the
// next page.
return err == DB_FAIL ? from_block : nullptr;
} else {
ut_ad(page_validate(to_page, index));
max_ins_size = page_get_max_insert_size(to_page, n_recs);
if (max_ins_size < move_size) {
@@ -392,18 +387,6 @@ btr_defragment_merge_pages(
&& *max_data_size > new_data_size + move_size) {
*max_data_size = new_data_size + move_size;
}
// Set ibuf free bits if necessary.
if (!dict_index_is_clust(index)
&& page_is_leaf(to_page)) {
if (zip_size) {
ibuf_reset_free_bits(to_block);
} else {
ibuf_update_free_bits_if_full(
to_block,
srv_page_size,
ULINT_UNDEFINED);
}
}
btr_cur_t parent;
parent.page_cur.index = index;
parent.page_cur.block = from_block;
@@ -526,8 +509,7 @@ btr_defragment_n_pages(
break;
}
blocks[i] = btr_block_get(*index, page_no, RW_X_LATCH, true,
mtr);
blocks[i] = btr_block_get(*index, page_no, RW_X_LATCH, mtr);
if (!blocks[i]) {
return nullptr;
}
@@ -542,7 +524,7 @@ btr_defragment_n_pages(
/* given page is the last page.
Lift the records to father. */
dberr_t err;
btr_lift_page_up(index, block, mtr, &err);
btr_lift_page_up(index, block, nullptr, mtr, &err);
}
return NULL;
}

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2022, MariaDB Corporation.
Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -157,20 +157,14 @@ before_first:
cursor->rel_pos = BTR_PCUR_ON;
}
if (index->is_ibuf()) {
ut_ad(!index->table->not_redundant());
cursor->old_n_fields = uint16_t(rec_get_n_fields_old(rec));
} else {
cursor->old_n_fields = static_cast<uint16>(
dict_index_get_n_unique_in_tree(index));
if (index->is_spatial() && !page_rec_is_leaf(rec)) {
ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index)
== DICT_INDEX_SPATIAL_NODEPTR_SIZE);
/* For R-tree, we have to compare
the child page numbers as well. */
cursor->old_n_fields
= DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
}
cursor->old_n_fields = static_cast<uint16>(
dict_index_get_n_unique_in_tree(index));
if (index->is_spatial() && !page_rec_is_leaf(rec)) {
ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index)
== DICT_INDEX_SPATIAL_NODEPTR_SIZE);
/* For R-tree, we have to compare
the child page numbers as well. */
cursor->old_n_fields = DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
}
cursor->old_n_core_fields = index->n_core_fields;
@@ -476,8 +470,7 @@ btr_pcur_move_to_next_page(
dberr_t err;
buf_block_t* next_block = btr_block_get(
*cursor->index(), next_page_no, mode,
page_is_leaf(page), mtr, &err);
*cursor->index(), next_page_no, mode, mtr, &err);
if (UNIV_UNLIKELY(!next_block)) {
return err;

View File

@@ -2,7 +2,7 @@
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -305,13 +305,6 @@ static void btr_search_info_update_hash(btr_search_t *info, btr_cur_t *cursor)
dict_index_t* index = cursor->index();
int cmp;
if (dict_index_is_ibuf(index)) {
/* So many deletes are performed on an insert buffer tree
that we do not consider a hash index useful on it: */
return;
}
uint16_t n_unique = dict_index_get_n_unique_in_tree(index);
if (info->n_hash_potential == 0) {
@@ -712,7 +705,6 @@ btr_search_update_hash_ref(
ut_ad(block->page.id().space() == index->table->space_id);
ut_ad(index == cursor->index());
ut_ad(!dict_index_is_ibuf(index));
auto part = btr_search_sys.get_part(*index);
part->latch.wr_lock(SRW_LOCK_CALL);
ut_ad(!block->index || block->index == index);
@@ -1062,7 +1054,6 @@ btr_search_guess_on_hash(
return false;
}
ut_ad(!index->is_ibuf());
ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
compile_time_assert(ulint{BTR_SEARCH_LEAF} == ulint{RW_S_LATCH});
compile_time_assert(ulint{BTR_MODIFY_LEAF} == ulint{RW_X_LATCH});
@@ -1269,7 +1260,6 @@ retry:
ut_ad(block->page.id().space() == index->table->space_id);
ut_a(index_id == index->id);
ut_ad(!dict_index_is_ibuf(index));
n_fields = block->curr_n_fields;
n_bytes = block->curr_n_bytes;
@@ -1472,7 +1462,6 @@ btr_search_build_page_hash_index(
ut_ad(ahi_latch == &btr_search_sys.get_part(*index)->latch);
ut_ad(index);
ut_ad(block->page.id().space() == index->table->space_id);
ut_ad(!dict_index_is_ibuf(index));
ut_ad(page_is_leaf(block->page.frame));
ut_ad(block->page.lock.have_x() || block->page.lock.have_s());
@@ -1798,7 +1787,6 @@ void btr_search_update_hash_on_delete(btr_cur_t *cursor)
ut_ad(block->page.id().space() == index->table->space_id);
ut_a(index == cursor->index());
ut_a(block->curr_n_fields > 0 || block->curr_n_bytes > 0);
ut_ad(!dict_index_is_ibuf(index));
rec = btr_cur_get_rec(cursor);
@@ -1871,7 +1859,6 @@ void btr_search_update_hash_node_on_insert(btr_cur_t *cursor,
}
ut_a(cursor->index() == index);
ut_ad(!dict_index_is_ibuf(index));
ahi_latch->wr_lock(SRW_LOCK_CALL);
if (!block->index || !btr_search_enabled) {
@@ -1964,7 +1951,6 @@ drop:
}
ut_a(index == cursor->index());
ut_ad(!dict_index_is_ibuf(index));
n_fields = block->curr_n_fields;
n_bytes = block->curr_n_bytes;
@@ -2213,7 +2199,6 @@ btr_search_hash_table_validate(ulint hash_table_id)
invokes btr_search_drop_page_hash_index(). */
ut_a(block->page.state() == buf_page_t::REMOVE_HASH);
state_ok:
ut_ad(!dict_index_is_ibuf(block->index));
ut_ad(block->page.id().space()
== block->index->table->space_id);

View File

@@ -2,7 +2,7 @@
Copyright (c) 1995, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -50,7 +50,6 @@ Created 11/5/1995 Heikki Tuuri
#include "buf0dblwr.h"
#include "lock0lock.h"
#include "btr0sea.h"
#include "ibuf0ibuf.h"
#include "trx0undo.h"
#include "trx0purge.h"
#include "log0log.h"
@@ -1820,9 +1819,6 @@ calc_buf_pool_size:
" and dictionary.";
}
/* normalize ibuf.max_size */
ibuf_max_size_update(srv_change_buffer_max_size);
if (srv_buf_pool_old_size != srv_buf_pool_size) {
buf_resize_status("Completed resizing buffer pool from %zu to %zu bytes."
@@ -1896,7 +1892,6 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage)
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked());
ut_ad(bpage == buf_pool.page_hash.get(id, chain));
ut_ad(!buf_pool.watch_is_sentinel(*bpage));
ut_d(const auto state= bpage->state());
ut_ad(state >= buf_page_t::FREED);
ut_ad(state <= buf_page_t::READ_FIX);
@@ -1940,135 +1935,6 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage)
buf_pool.page_hash.replace(chain, bpage, dpage);
}
/** Register a watch for a page identifier. The caller must hold an
exclusive page hash latch. The *hash_lock may be released,
relocated, and reacquired.
@param id page identifier
@param chain hash table chain with exclusively held page_hash
@return a buffer pool block corresponding to id
@retval nullptr if the block was not present, and a watch was installed */
inline buf_page_t *buf_pool_t::watch_set(const page_id_t id,
buf_pool_t::hash_chain &chain)
{
ut_ad(&chain == &page_hash.cell_get(id.fold()));
ut_ad(page_hash.lock_get(chain).is_write_locked());
retry:
if (buf_page_t *bpage= page_hash.get(id, chain))
{
if (!watch_is_sentinel(*bpage))
/* The page was loaded meanwhile. */
return bpage;
/* Add to an existing watch. */
bpage->fix();
return nullptr;
}
page_hash.lock_get(chain).unlock();
/* Allocate a watch[] and then try to insert it into the page_hash. */
mysql_mutex_lock(&mutex);
/* The maximum number of purge tasks should never exceed
the UT_ARR_SIZE(watch) - 1, and there is no way for a purge task to hold a
watch when setting another watch. */
for (buf_page_t *w= &watch[UT_ARR_SIZE(watch)]; w-- >= watch; )
{
ut_ad(w->access_time == 0);
ut_ad(!w->oldest_modification());
ut_ad(!w->zip.data);
ut_ad(!w->in_zip_hash);
static_assert(buf_page_t::NOT_USED == 0, "efficiency");
if (ut_d(auto s=) w->state())
{
/* This watch may be in use for some other page. */
ut_ad(s >= buf_page_t::UNFIXED);
continue;
}
/* w is pointing to watch[], which is protected by mutex.
Normally, buf_page_t::id for objects that are reachable by
page_hash.get(id, chain) are protected by hash_lock. */
w->set_state(buf_page_t::UNFIXED + 1);
w->id_= id;
buf_page_t *bpage= page_hash.get(id, chain);
if (UNIV_LIKELY_NULL(bpage))
{
w->set_state(buf_page_t::NOT_USED);
page_hash.lock_get(chain).lock();
mysql_mutex_unlock(&mutex);
goto retry;
}
page_hash.lock_get(chain).lock();
ut_ad(w->state() == buf_page_t::UNFIXED + 1);
buf_pool.page_hash.append(chain, w);
mysql_mutex_unlock(&mutex);
return nullptr;
}
ut_error;
mysql_mutex_unlock(&mutex);
return nullptr;
}
/** Stop watching whether a page has been read in.
watch_set(id) must have returned nullptr before.
@param id page identifier
@param chain unlocked hash table chain */
TRANSACTIONAL_TARGET
void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain)
{
mysql_mutex_assert_not_owner(&mutex);
buf_page_t *w;
{
transactional_lock_guard<page_hash_latch> g{page_hash.lock_get(chain)};
/* The page must exist because watch_set() did fix(). */
w= page_hash.get(id, chain);
ut_ad(w->in_page_hash);
if (!watch_is_sentinel(*w))
{
no_watch:
w->unfix();
w= nullptr;
}
else
{
const auto state= w->state();
ut_ad(~buf_page_t::LRU_MASK & state);
ut_ad(state >= buf_page_t::UNFIXED + 1);
if (state != buf_page_t::UNFIXED + 1)
goto no_watch;
}
}
if (!w)
return;
const auto old= w;
/* The following is based on buf_pool_t::watch_remove(). */
mysql_mutex_lock(&mutex);
w= page_hash.get(id, chain);
{
transactional_lock_guard<page_hash_latch> g
{buf_pool.page_hash.lock_get(chain)};
auto f= w->unfix();
ut_ad(f < buf_page_t::READ_FIX || w != old);
if (f == buf_page_t::UNFIXED && w == old)
{
page_hash.remove(chain, w);
// Now that w is detached from page_hash, release it to watch[].
ut_ad(w->id_ == id);
ut_ad(!w->frame);
ut_ad(!w->zip.data);
w->set_state(buf_page_t::NOT_USED);
}
}
mysql_mutex_unlock(&mutex);
}
/** Mark the page status as FREED for the given tablespace and page number.
@param[in,out] space tablespace
@param[in] page page number
@@ -2150,7 +2016,7 @@ lookup:
if (hash_lock.is_locked())
xabort();
bpage= buf_pool.page_hash.get(page_id, chain);
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
if (!bpage)
{
xend();
goto must_read_page;
@@ -2175,7 +2041,7 @@ lookup:
{
hash_lock.lock_shared();
bpage= buf_pool.page_hash.get(page_id, chain);
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
if (!bpage)
{
hash_lock.unlock_shared();
goto must_read_page;
@@ -2348,13 +2214,9 @@ err_exit:
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
or BUF_PEEK_IF_IN_POOL
@param[in] mtr mini-transaction
@param[out] err DB_SUCCESS or error code
@param[in] allow_ibuf_merge Allow change buffer merge to happen
while reading the page from file
then it makes sure that it does merging of change buffer changes while
reading the page from file.
@return pointer to the block or NULL */
TRANSACTIONAL_TARGET
buf_block_t*
@@ -2365,10 +2227,8 @@ buf_page_get_low(
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err,
bool allow_ibuf_merge)
dberr_t* err)
{
unsigned access_time;
ulint retries = 0;
ut_ad(!mtr || mtr->is_active());
@@ -2377,11 +2237,6 @@ buf_page_get_low(
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_SX_LATCH)
|| (rw_latch == RW_NO_LATCH));
ut_ad(!allow_ibuf_merge
|| mode == BUF_GET
|| mode == BUF_GET_POSSIBLY_FREED
|| mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);
if (err) {
*err = DB_SUCCESS;
@@ -2399,7 +2254,6 @@ buf_page_get_low(
case BUF_GET_POSSIBLY_FREED:
break;
case BUF_GET:
case BUF_GET_IF_IN_POOL_OR_WATCH:
ut_ad(!mtr->is_freeing_tree());
fil_space_t* s = fil_space_get(page_id.space());
ut_ad(s);
@@ -2407,9 +2261,6 @@ buf_page_get_low(
}
#endif /* UNIV_DEBUG */
ut_ad(!mtr || !ibuf_inside(mtr)
|| ibuf_page_low(page_id, zip_size, FALSE, NULL));
++buf_pool.stat.n_page_gets;
auto& chain= buf_pool.page_hash.cell_get(page_id.fold());
@@ -2442,8 +2293,7 @@ loop:
hash_lock.lock_shared();
block = reinterpret_cast<buf_block_t*>(
buf_pool.page_hash.get(page_id, chain));
if (UNIV_LIKELY(block
&& !buf_pool.watch_is_sentinel(block->page))) {
if (UNIV_LIKELY(block != nullptr)) {
state = block->page.fix();
hash_lock.unlock_shared();
goto got_block;
@@ -2454,20 +2304,6 @@ loop:
switch (mode) {
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
return nullptr;
case BUF_GET_IF_IN_POOL_OR_WATCH:
/* We cannot easily use a memory transaction here. */
hash_lock.lock();
block = reinterpret_cast<buf_block_t*>
(buf_pool.watch_set(page_id, chain));
/* buffer-fixing will prevent eviction */
state = block ? block->page.fix() : 0;
hash_lock.unlock();
if (block) {
goto got_block;
}
return nullptr;
}
@@ -2495,7 +2331,7 @@ loop:
return nullptr;
}
} else {
buf_read_ahead_random(page_id, zip_size, ibuf_inside(mtr));
buf_read_ahead_random(page_id, zip_size);
}
ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate());
@@ -2603,7 +2439,6 @@ wait_for_unfix:
switch (state) {
case buf_page_t::UNFIXED + 1:
case buf_page_t::IBUF_EXIST + 1:
case buf_page_t::REINIT + 1:
break;
default:
@@ -2657,13 +2492,6 @@ wait_for_unfix:
buf_pool.n_pend_unzip++;
access_time = block->page.is_accessed();
if (!access_time && !recv_no_ibuf_operations
&& ibuf_page_exists(block->page.id(), block->zip_size())) {
state = buf_page_t::IBUF_EXIST + 1;
}
/* Decompress the page while not holding
buf_pool.mutex. */
auto ok = buf_zip_decompress(block, false);
@@ -2683,55 +2511,6 @@ wait_for_unfix:
}
}
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
re_evict:
if (mode != BUF_GET_IF_IN_POOL
&& mode != BUF_GET_IF_IN_POOL_OR_WATCH) {
} else if (!ibuf_debug || recv_recovery_is_on()) {
} else if (fil_space_t* space = fil_space_t::get(page_id.space())) {
/* Try to evict the block from the buffer pool, to use the
insert buffer (change buffer) as much as possible. */
mysql_mutex_lock(&buf_pool.mutex);
block->unfix();
/* Blocks cannot be relocated or enter or exit the
buf_pool while we are holding the buf_pool.mutex. */
const bool evicted = buf_LRU_free_page(&block->page, true);
space->release();
if (evicted) {
page_hash_latch& hash_lock
= buf_pool.page_hash.lock_get(chain);
hash_lock.lock();
mysql_mutex_unlock(&buf_pool.mutex);
/* We may set the watch, as it would have
been set if the page were not in the
buffer pool in the first place. */
block= reinterpret_cast<buf_block_t*>(
mode == BUF_GET_IF_IN_POOL_OR_WATCH
? buf_pool.watch_set(page_id, chain)
: buf_pool.page_hash.get(page_id, chain));
hash_lock.unlock();
return(NULL);
}
block->fix();
mysql_mutex_unlock(&buf_pool.mutex);
buf_flush_sync();
state = block->page.state();
if (state == buf_page_t::UNFIXED + 1
&& !block->page.oldest_modification()) {
goto re_evict;
}
/* Failed to evict the page; change it directly */
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
ut_ad(state > buf_page_t::FREED);
if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) {
goto ignore_block;
@@ -2744,118 +2523,69 @@ re_evict:
#endif /* UNIV_DEBUG */
ut_ad(block->page.frame);
if (state >= buf_page_t::UNFIXED
&& allow_ibuf_merge
&& fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX
&& page_is_leaf(block->page.frame)) {
block->page.lock.x_lock();
ut_ad(block->page.id() == page_id
|| (state >= buf_page_t::READ_FIX
&& state < buf_page_t::WRITE_FIX));
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(block, true);
#endif /* BTR_CUR_HASH_ADAPT */
dberr_t e;
mtr_memo_type_t fix_type;
switch (rw_latch) {
case RW_NO_LATCH:
mtr->memo_push(block, MTR_MEMO_BUF_FIX);
return block;
case RW_S_LATCH:
fix_type = MTR_MEMO_PAGE_S_FIX;
block->page.lock.s_lock();
ut_ad(!block->page.is_read_fixed());
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
block->page.lock.s_unlock();
block->page.lock.x_lock();
page_id_mismatch:
state = block->page.state();
e = DB_CORRUPTION;
ibuf_merge_corrupted:
if (err) {
*err = e;
}
if (block->page.id().is_corrupted()) {
buf_pool.corrupted_evict(&block->page, state);
buf_pool.corrupted_evict(&block->page,
block->page.state());
}
if (err) {
*err = DB_CORRUPTION;
}
return nullptr;
}
state = block->page.state();
ut_ad(state < buf_page_t::READ_FIX);
if (state >= buf_page_t::IBUF_EXIST
&& state < buf_page_t::REINIT) {
block->page.clear_ibuf_exist();
e = ibuf_merge_or_delete_for_page(block, page_id,
block->zip_size());
if (UNIV_UNLIKELY(e != DB_SUCCESS)) {
goto ibuf_merge_corrupted;
}
break;
case RW_SX_LATCH:
fix_type = MTR_MEMO_PAGE_SX_FIX;
block->page.lock.u_lock();
ut_ad(!block->page.is_io_fixed());
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
block->page.lock.u_x_upgrade();
goto page_id_mismatch;
}
if (rw_latch == RW_X_LATCH) {
mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
goto got_latch;
} else {
block->page.lock.x_unlock();
goto get_latch;
}
} else {
get_latch:
switch (rw_latch) {
mtr_memo_type_t fix_type;
case RW_NO_LATCH:
mtr->memo_push(block, MTR_MEMO_BUF_FIX);
break;
default:
ut_ad(rw_latch == RW_X_LATCH);
fix_type = MTR_MEMO_PAGE_X_FIX;
if (block->page.lock.x_lock_upgraded()) {
ut_ad(block->page.id() == page_id);
block->unfix();
mtr->page_lock_upgrade(*block);
return block;
case RW_S_LATCH:
fix_type = MTR_MEMO_PAGE_S_FIX;
block->page.lock.s_lock();
ut_ad(!block->page.is_read_fixed());
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
block->page.lock.s_unlock();
block->page.lock.x_lock();
goto page_id_mismatch;
}
get_latch_valid:
mtr->memo_push(block, fix_type);
}
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
goto page_id_mismatch;
}
}
mtr->memo_push(block, fix_type);
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(block, true);
btr_search_drop_page_hash_index(block, true);
#endif /* BTR_CUR_HASH_ADAPT */
break;
case RW_SX_LATCH:
fix_type = MTR_MEMO_PAGE_SX_FIX;
block->page.lock.u_lock();
ut_ad(!block->page.is_io_fixed());
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
block->page.lock.u_x_upgrade();
goto page_id_mismatch;
}
goto get_latch_valid;
default:
ut_ad(rw_latch == RW_X_LATCH);
fix_type = MTR_MEMO_PAGE_X_FIX;
if (block->page.lock.x_lock_upgraded()) {
ut_ad(block->page.id() == page_id);
block->unfix();
mtr->page_lock_upgrade(*block);
return block;
}
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
goto page_id_mismatch;
}
goto get_latch_valid;
}
got_latch:
ut_ad(page_id_t(page_get_space_id(block->page.frame),
page_get_page_no(block->page.frame))
== page_id);
ut_ad(page_id_t(page_get_space_id(block->page.frame),
page_get_page_no(block->page.frame)) == page_id);
if (mode == BUF_GET_POSSIBLY_FREED
|| mode == BUF_PEEK_IF_IN_POOL) {
return block;
}
if (mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL) {
return block;
}
const bool not_first_access{block->page.set_accessed()};
buf_page_make_young_if_needed(&block->page);
if (!not_first_access) {
buf_read_ahead_linear(page_id, block->zip_size(),
ibuf_inside(mtr));
}
const bool not_first_access{block->page.set_accessed()};
buf_page_make_young_if_needed(&block->page);
if (!not_first_access) {
buf_read_ahead_linear(page_id, block->zip_size());
}
return block;
@@ -2867,11 +2597,9 @@ got_latch:
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction, or NULL
@param[out] err DB_SUCCESS or error code
@param[in] allow_ibuf_merge Allow change buffer merge while
reading the pages from file.
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_gen(
@@ -2881,8 +2609,7 @@ buf_page_get_gen(
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err,
bool allow_ibuf_merge)
dberr_t* err)
{
if (buf_block_t *block= recv_sys.recover(page_id))
{
@@ -2899,57 +2626,20 @@ buf_page_get_gen(
/* The block may be write-fixed at this point because we are not
holding a lock, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
if (err)
*err= DB_SUCCESS;
const bool must_merge= allow_ibuf_merge &&
ibuf_page_exists(page_id, block->zip_size());
if (s < buf_page_t::UNFIXED)
{
got_freed_page:
ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
block->page.unfix();
goto corrupted;
}
else if (must_merge &&
fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX &&
page_is_leaf(block->page.frame))
{
block->page.lock.x_lock();
s= block->page.state();
ut_ad(s > buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX);
if (s < buf_page_t::UNFIXED)
{
block->page.lock.x_unlock();
goto got_freed_page;
}
else
{
if (block->page.is_ibuf_exist())
block->page.clear_ibuf_exist();
if (dberr_t e=
ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()))
{
if (err)
*err= e;
buf_pool.corrupted_evict(&block->page, s);
return nullptr;
}
}
if (rw_latch == RW_X_LATCH)
{
mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
return block;
}
block->page.lock.x_unlock();
}
if (err)
*err= DB_SUCCESS;
mtr->page_lock(block, rw_latch);
return block;
}
return buf_page_get_low(page_id, zip_size, rw_latch,
guess, mode, mtr, err, allow_ibuf_merge);
guess, mode, mtr, err);
}
/********************************************************************//**
@@ -3014,7 +2704,6 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,
{
ut_ad(rw_latch == RW_S_LATCH || !block->page.is_io_fixed());
ut_ad(id == block->page.id());
ut_ad(!ibuf_inside(mtr) || ibuf_page(id, block->zip_size(), nullptr));
if (modify_clock != block->modify_clock || block->page.is_freed())
{
@@ -3110,12 +2799,11 @@ retry:
buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain);
if (bpage && !buf_pool.watch_is_sentinel(*bpage))
if (bpage)
{
#ifdef BTR_CUR_HASH_ADAPT
const dict_index_t *drop_hash_entry= nullptr;
#endif
bool ibuf_exist= false;
if (!mtr->have_x_latch(reinterpret_cast<const buf_block_t&>(*bpage)))
{
@@ -3141,10 +2829,7 @@ retry:
if (state < buf_page_t::UNFIXED)
bpage->set_reinit(buf_page_t::FREED);
else
{
bpage->set_reinit(state & buf_page_t::LRU_MASK);
ibuf_exist= (state & buf_page_t::LRU_MASK) == buf_page_t::IBUF_EXIST;
}
if (UNIV_LIKELY(bpage->frame != nullptr))
{
@@ -3170,10 +2855,7 @@ retry:
if (state < buf_page_t::UNFIXED)
bpage->set_reinit(buf_page_t::FREED);
else
{
bpage->set_reinit(state & buf_page_t::LRU_MASK);
ibuf_exist= (state & buf_page_t::LRU_MASK) == buf_page_t::IBUF_EXIST;
}
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_relocate(bpage, &free_block->page);
@@ -3213,9 +2895,6 @@ retry:
false);
#endif /* BTR_CUR_HASH_ADAPT */
if (ibuf_exist && !recv_recovery_is_on())
ibuf_merge_or_delete_for_page(nullptr, page_id, zip_size);
return reinterpret_cast<buf_block_t*>(bpage);
}
@@ -3256,13 +2935,6 @@ retry:
bpage->set_accessed();
buf_pool.stat.n_pages_created++;
/* Delete possible entries for the page from the insert buffer:
such can exist if the page belonged to an index which was dropped */
if (page_id < page_id_t{SRV_SPACE_ID_UPPER_BOUND, 0} &&
!srv_is_undo_tablespace(page_id.space()) &&
!recv_recovery_is_on())
ibuf_merge_or_delete_for_page(nullptr, page_id, zip_size);
static_assert(FIL_PAGE_PREV + 4 == FIL_PAGE_NEXT, "adjacent");
memset_aligned<8>(bpage->frame + FIL_PAGE_PREV, 0xff, 8);
mach_write_to_2(bpage->frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
@@ -3326,32 +2998,15 @@ ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read)
const byte* frame = bpage.zip.data ? bpage.zip.data : bpage.frame;
switch (fil_page_get_type(frame)) {
ulint level;
case FIL_PAGE_TYPE_INSTANT:
case FIL_PAGE_INDEX:
case FIL_PAGE_RTREE:
level = btr_page_get_level(frame);
/* Check if it is an index page for insert buffer */
if (fil_page_get_type(frame) == FIL_PAGE_INDEX
&& btr_page_get_index_id(frame)
== (index_id_t)(DICT_IBUF_ID_MIN + IBUF_SPACE_ID)) {
if (level == 0) {
counter = MONITOR_RW_COUNTER(
read, MONITOR_INDEX_IBUF_LEAF_PAGE);
} else {
counter = MONITOR_RW_COUNTER(
read,
MONITOR_INDEX_IBUF_NON_LEAF_PAGE);
}
if (page_is_leaf(frame)) {
counter = MONITOR_RW_COUNTER(
read, MONITOR_INDEX_LEAF_PAGE);
} else {
if (level == 0) {
counter = MONITOR_RW_COUNTER(
read, MONITOR_INDEX_LEAF_PAGE);
} else {
counter = MONITOR_RW_COUNTER(
read, MONITOR_INDEX_NON_LEAF_PAGE);
}
counter = MONITOR_RW_COUNTER(
read, MONITOR_INDEX_NON_LEAF_PAGE);
}
break;
@@ -3363,14 +3018,6 @@ ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read)
counter = MONITOR_RW_COUNTER(read, MONITOR_INODE_PAGE);
break;
case FIL_PAGE_IBUF_FREE_LIST:
counter = MONITOR_RW_COUNTER(read, MONITOR_IBUF_FREELIST_PAGE);
break;
case FIL_PAGE_IBUF_BITMAP:
counter = MONITOR_RW_COUNTER(read, MONITOR_IBUF_BITMAP_PAGE);
break;
case FIL_PAGE_TYPE_SYS:
counter = MONITOR_RW_COUNTER(read, MONITOR_SYSTEM_PAGE);
break;
@@ -3603,25 +3250,16 @@ release_page:
if (recovery && !recv_recover_page(node.space, this))
return DB_PAGE_CORRUPTED;
const bool ibuf_may_exist= frame && !recv_no_ibuf_operations &&
(!expected_id.space() || !is_predefined_tablespace(expected_id.space())) &&
fil_page_get_type(read_frame) == FIL_PAGE_INDEX &&
page_is_leaf(read_frame);
if (UNIV_UNLIKELY(MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE)))
buf_page_monitor(*this, true);
DBUG_PRINT("ib_buf", ("read page %u:%u", id().space(), id().page_no()));
if (!recovery)
{
ut_d(auto f=) zip.fix.fetch_sub(ibuf_may_exist
? READ_FIX - IBUF_EXIST
: READ_FIX - UNFIXED);
ut_d(auto f=) zip.fix.fetch_sub(READ_FIX - UNFIXED);
ut_ad(f >= READ_FIX);
ut_ad(f < WRITE_FIX);
}
else if (ibuf_may_exist)
set_ibuf_exist();
lock.x_unlock(true);

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
Copyright (c) 2013, 2014, Fusion-io
This program is free software; you can redistribute it and/or modify it under
@@ -889,7 +889,7 @@ static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, bool lru)
const buf_page_t *bpage=
buf_pool.page_hash.get(id, buf_pool.page_hash.cell_get(fold));
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
if (!bpage)
return false;
/* We avoid flushing 'non-old' blocks in an LRU flush, because the
@@ -1066,8 +1066,7 @@ static ulint buf_flush_try_neighbors(fil_space_t *space,
because the flushed blocks are soon freed */
if (!lru || id == page_id || bpage->is_old())
{
if (!buf_pool.watch_is_sentinel(*bpage) &&
bpage->oldest_modification() > 1 && bpage->ready_for_flush() &&
if (bpage->oldest_modification() > 1 && bpage->ready_for_flush() &&
bpage->flush(lru, space))
{
++count;
@@ -1174,7 +1173,7 @@ static void buf_flush_discard_page(buf_page_t *bpage)
ut_d(const auto state= bpage->state());
ut_ad(state == buf_page_t::FREED || state == buf_page_t::UNFIXED ||
state == buf_page_t::IBUF_EXIST || state == buf_page_t::REINIT);
state == buf_page_t::REINIT);
bpage->lock.u_unlock();
buf_LRU_free_page(bpage, true);

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2022, MariaDB Corporation.
Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -35,7 +35,7 @@ Created 11/5/1995 Heikki Tuuri
#include "buf0lru.h"
#include "buf0buddy.h"
#include "buf0dblwr.h"
#include "ibuf0ibuf.h"
#include "page0zip.h"
#include "log0recv.h"
#include "trx0sys.h"
#include "os0file.h"
@@ -48,74 +48,24 @@ read-ahead is not done: this is to prevent flooding the buffer pool with
i/o-fixed buffer blocks */
#define BUF_READ_AHEAD_PEND_LIMIT 2
/** Remove the sentinel block for the watch before replacing it with a
real block. watch_unset() or watch_occurred() will notice
that the block has been replaced with the real block.
@param w sentinel
@param chain locked hash table chain
@return w->state() */
inline uint32_t buf_pool_t::watch_remove(buf_page_t *w,
buf_pool_t::hash_chain &chain)
{
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(xtest() || page_hash.lock_get(chain).is_write_locked());
ut_ad(w >= &watch[0]);
ut_ad(w < &watch[array_elements(watch)]);
ut_ad(!w->in_zip_hash);
ut_ad(!w->zip.data);
uint32_t s{w->state()};
w->set_state(buf_page_t::NOT_USED);
ut_ad(s >= buf_page_t::UNFIXED);
ut_ad(s < buf_page_t::READ_FIX);
if (~buf_page_t::LRU_MASK & s)
page_hash.remove(chain, w);
ut_ad(!w->in_page_hash);
w->id_= page_id_t(~0ULL);
return s;
}
/** Initialize a page for read to the buffer buf_pool. If the page is
(1) already in buf_pool, or
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
(3) if the space is deleted or being deleted,
(2) if the tablespace has been or is being deleted,
then this function does nothing.
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
on the buffer frame. The io-handler must take care that the flag is cleared
and the lock released later.
@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] unzip whether the uncompressed page is
requested (for ROW_FORMAT=COMPRESSED)
@return pointer to the block
@retval NULL in case of an error */
TRANSACTIONAL_TARGET
static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
ulint zip_size, bool unzip)
static buf_page_t* buf_page_init_for_read(const page_id_t page_id,
ulint zip_size)
{
mtr_t mtr;
if (mode == BUF_READ_IBUF_PAGES_ONLY)
{
/* It is a read-ahead within an ibuf routine */
ut_ad(!ibuf_bitmap_page(page_id, zip_size));
ibuf_mtr_start(&mtr);
if (!recv_no_ibuf_operations && !ibuf_page(page_id, zip_size, &mtr))
{
ibuf_mtr_commit(&mtr);
return nullptr;
}
}
else
ut_ad(mode == BUF_READ_ANY_PAGE);
buf_page_t *bpage= nullptr;
buf_block_t *block= nullptr;
if (!zip_size || unzip || recv_recovery_is_on())
if (!zip_size || recv_recovery_is_on())
{
block= buf_LRU_get_free_block(false);
block->initialise(page_id, zip_size, buf_page_t::READ_FIX);
@@ -128,8 +78,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
mysql_mutex_lock(&buf_pool.mutex);
buf_page_t *hash_page= buf_pool.page_hash.get(page_id, chain);
if (hash_page && !buf_pool.watch_is_sentinel(*hash_page))
if (buf_pool.page_hash.get(page_id, chain))
{
/* The page is already in the buffer pool. */
if (block)
@@ -149,11 +98,6 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
{
transactional_lock_guard<page_hash_latch> g
{buf_pool.page_hash.lock_get(chain)};
if (hash_page)
bpage->set_state(buf_pool.watch_remove(hash_page, chain) +
(buf_page_t::READ_FIX - buf_page_t::UNFIXED));
buf_pool.page_hash.append(chain, &block->page);
}
@@ -191,9 +135,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
check the page_hash again, as it may have been modified. */
if (UNIV_UNLIKELY(lru))
{
hash_page= buf_pool.page_hash.get(page_id, chain);
if (UNIV_UNLIKELY(hash_page && !buf_pool.watch_is_sentinel(*hash_page)))
if (UNIV_LIKELY_NULL(buf_pool.page_hash.get(page_id, chain)))
{
/* The block was added by some other thread. */
buf_buddy_free(data, zip_size);
@@ -213,11 +155,6 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
{
transactional_lock_guard<page_hash_latch> g
{buf_pool.page_hash.lock_get(chain)};
if (hash_page)
bpage->set_state(buf_pool.watch_remove(hash_page, chain) +
(buf_page_t::READ_FIX - buf_page_t::UNFIXED));
buf_pool.page_hash.append(chain, bpage);
}
@@ -228,13 +165,9 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
mysql_mutex_unlock(&buf_pool.mutex);
buf_pool.n_pend_reads++;
goto func_exit_no_mutex;
return bpage;
func_exit:
mysql_mutex_unlock(&buf_pool.mutex);
func_exit_no_mutex:
if (mode == BUF_READ_IBUF_PAGES_ONLY)
ibuf_mtr_commit(&mtr);
ut_ad(!bpage || bpage->in_file());
return bpage;
@@ -250,10 +183,8 @@ flag is cleared and the x-lock released by an i/o-handler thread.
to read from a non-existent tablespace
@param[in,out] space tablespace
@param[in] sync true if synchronous aio is desired
@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] unzip true=request uncompressed page
@return whether a read request was queued */
static
bool
@@ -261,10 +192,8 @@ buf_read_page_low(
dberr_t* err,
fil_space_t* space,
bool sync,
ulint mode,
const page_id_t page_id,
ulint zip_size,
bool unzip)
ulint zip_size)
{
buf_page_t* bpage;
@@ -279,25 +208,11 @@ nothing_read:
return false;
}
if (sync) {
} else if (trx_sys_hdr_page(page_id)
|| ibuf_bitmap_page(page_id, zip_size)
|| (!recv_no_ibuf_operations
&& ibuf_page(page_id, zip_size, nullptr))) {
/* Trx sys header is so low in the latching order that we play
safe and do not leave the i/o-completion to an asynchronous
i/o-thread. Change buffer pages must always be read with
synchronous i/o, to make sure they do not get involved in
thread deadlocks. */
sync = true;
}
/* The following call will also check if the tablespace does not exist
or is being dropped; if we succeed in initing the page in the buffer
pool for read, then DISCARD cannot proceed until the read has
completed */
bpage = buf_page_init_for_read(mode, page_id, zip_size, unzip);
bpage = buf_page_init_for_read(page_id, zip_size);
if (bpage == NULL) {
goto nothing_read;
@@ -311,7 +226,7 @@ nothing_read:
DBUG_LOG("ib_buf",
"read page " << page_id << " zip_size=" << zip_size
<< " unzip=" << unzip << ',' << (sync ? "sync" : "async"));
<< (sync ? " sync" : " async"));
void* dst = zip_size ? bpage->zip.data : bpage->frame;
const ulint len = zip_size ? zip_size : srv_page_size;
@@ -339,22 +254,15 @@ nothing_read:
/** Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
mechanism is not activated. NOTE 1: the calling thread may own latches on
mechanism is not activated. NOTE: the calling thread may own latches on
pages: to avoid deadlocks this function must be written such that it cannot
end up waiting for these latches! NOTE 2: the calling thread must want
access to the page given: this rule is set to prevent unintended read-aheads
performed by ibuf routines, a situation which could result in a deadlock if
the OS does not support asynchronous i/o.
end up waiting for these latches!
@param[in] page_id page id of a page which the current thread
wants to access
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] ibuf whether we are inside ibuf routine
@return number of page read requests issued; NOTE that if we read ibuf
pages, it may happen that the page at the given page number does not
get read even if we return a positive value! */
@return number of page read requests issued */
TRANSACTIONAL_TARGET
ulint
buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size)
{
if (!srv_random_read_ahead)
return 0;
@@ -363,11 +271,6 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
/* No read-ahead to avoid thread deadlocks */
return 0;
if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
/* If it is an ibuf bitmap page or trx sys hdr, we do no
read-ahead, as that could break the ibuf page access order */
return 0;
if (buf_pool.n_pend_reads > buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT)
return 0;
@@ -403,17 +306,14 @@ read_ahead:
goto no_read_ahead;
/* Read all the suitable blocks within the area */
const ulint ibuf_mode= ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
for (page_id_t i= low; i < high; ++i)
{
if (ibuf_bitmap_page(i, zip_size))
continue;
if (space->is_stopping())
break;
dberr_t err;
space->reacquire();
if (buf_read_page_low(&err, space, false, ibuf_mode, i, zip_size, false))
if (buf_read_page_low(&err, space, false, i, zip_size))
count++;
}
@@ -454,8 +354,7 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)
}
dberr_t err;
if (buf_read_page_low(&err, space, true, BUF_READ_ANY_PAGE,
page_id, zip_size, false))
if (buf_read_page_low(&err, space, true, page_id, zip_size))
srv_stats.buf_pool_reads.add(1);
buf_LRU_stat_inc_io();
@@ -474,8 +373,7 @@ void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
{
dberr_t err;
if (buf_read_page_low(&err, space, false, BUF_READ_ANY_PAGE,
page_id, zip_size, false)) {
if (buf_read_page_low(&err, space, false, page_id, zip_size)) {
srv_stats.buf_pool_reads.add(1);
}
@@ -506,16 +404,11 @@ only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io.
@param[in] page_id page id; see NOTE 3 above
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] ibuf whether if we are inside ibuf routine
@return number of page read requests issued */
TRANSACTIONAL_TARGET
ulint
buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
{
/* check if readahead is disabled */
if (!srv_read_ahead_threshold)
@@ -540,11 +433,6 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
/* This is not a border page of the area */
return 0;
if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
/* If it is an ibuf bitmap page or trx sys hdr, we do no
read-ahead, as that could break the ibuf page access order */
return 0;
fil_space_t *space= fil_space_t::get(page_id.space());
if (!space)
return 0;
@@ -628,17 +516,13 @@ failed:
/* If we got this far, read-ahead can be sensible: do it */
count= 0;
for (ulint ibuf_mode= ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
new_low != new_high_1; ++new_low)
for (; new_low != new_high_1; ++new_low)
{
if (ibuf_bitmap_page(new_low, zip_size))
continue;
if (space->is_stopping())
break;
dberr_t err;
space->reacquire();
count+= buf_read_page_low(&err, space, false, ibuf_mode, new_low, zip_size,
false);
count+= buf_read_page_low(&err, space, false, new_low, zip_size);
}
if (count)
@@ -706,9 +590,7 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos)
dberr_t err;
space->reacquire();
buf_read_page_low(&err, space, false,
BUF_READ_ANY_PAGE, cur_page_id, zip_size,
true);
buf_read_page_low(&err, space, false, cur_page_id, zip_size);
if (err != DB_SUCCESS) {
sql_print_error("InnoDB: Recovery failed to read page "

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,13 +33,6 @@ const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN] = {
0x80, 0, 0, 0, 0, 0, 0
};
/* At the database startup we store the default-charset collation number of
this MySQL installation to this global variable. If we have < 4.1.2 format
column definitions, or records in the insert buffer, we use this
charset-collation code for them. */
ulint data_mysql_default_charset_coll;
/*********************************************************************//**
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2022, MariaDB Corporation.
Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,7 +30,6 @@ Created 4/18/1996 Heikki Tuuri
#include "dict0load.h"
#include "trx0trx.h"
#include "srv0srv.h"
#include "ibuf0ibuf.h"
#include "buf0flu.h"
#include "log0recv.h"
#include "os0file.h"
@@ -233,12 +232,12 @@ dberr_t dict_boot()
dict_sys.create();
dberr_t err;
const buf_block_t *d = buf_page_get_gen(hdr_page_id, 0, RW_X_LATCH,
const buf_block_t *d = buf_page_get_gen(hdr_page_id, 0, RW_S_LATCH,
nullptr, BUF_GET, &mtr, &err);
if (!d) {
if (!d) {
mtr.commit();
return err;
}
}
heap = mem_heap_create(450);
@@ -420,10 +419,7 @@ dberr_t dict_boot()
mtr.commit();
err = ibuf_init_at_db_start();
if (err == DB_SUCCESS || srv_force_recovery >= SRV_FORCE_NO_DDL_UNDO) {
err = DB_SUCCESS;
if (err == DB_SUCCESS) {
/* Load definitions of other indexes on system tables */
dict_load_sys_table(dict_sys.sys_tables);

View File

@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2016, 2022, MariaDB Corporation.
Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -214,9 +214,6 @@ Save defragmentation result.
@return DB_SUCCESS or error code */
dberr_t dict_stats_save_defrag_summary(dict_index_t *index, THD *thd)
{
if (index->is_ibuf())
return DB_SUCCESS;
MDL_ticket *mdl_table= nullptr, *mdl_index= nullptr;
dict_table_t *table_stats= dict_table_open_on_name(TABLE_STATS_NAME, false,
DICT_ERR_IGNORE_NONE);
@@ -336,8 +333,6 @@ dict_stats_save_defrag_stats(
/*============================*/
dict_index_t* index) /*!< in: index */
{
if (index->is_ibuf())
return DB_SUCCESS;
if (!index->is_readable())
return dict_stats_report_error(index->table, true);

View File

@@ -2,7 +2,7 @@
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1999,7 +1999,6 @@ dict_index_add_to_cache(
ut_ad(index->n_def == index->n_fields);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(!dict_index_is_online_ddl(index));
ut_ad(!dict_index_is_ibuf(index));
ut_d(mem_heap_validate(index->heap));
ut_a(!dict_index_is_clust(index)
@@ -2381,15 +2380,7 @@ dict_index_copy_types(
ulint n_fields) /*!< in: number of
field types to copy */
{
ulint i;
if (dict_index_is_ibuf(index)) {
dtuple_set_types_binary(tuple, n_fields);
return;
}
for (i = 0; i < n_fields; i++) {
for (ulint i = 0; i < n_fields; i++) {
const dict_field_t* ifield;
dtype_t* dfield_type;
@@ -2628,17 +2619,14 @@ dict_index_build_internal_non_clust(
ulint i;
ibool* indexed;
ut_ad(table && index);
ut_ad(!dict_index_is_clust(index));
ut_ad(!dict_index_is_ibuf(index));
ut_ad(!index->is_primary());
ut_ad(dict_sys.locked());
/* The clustered index should be the first in the list of indexes */
clust_index = UT_LIST_GET_FIRST(table->indexes);
ut_ad(clust_index);
ut_ad(dict_index_is_clust(clust_index));
ut_ad(!dict_index_is_ibuf(clust_index));
ut_ad(clust_index->is_clust());
/* Create a new index */
new_index = dict_mem_index_create(
@@ -3769,24 +3757,7 @@ dict_index_build_node_ptr(
dtuple_t* tuple;
dfield_t* field;
byte* buf;
ulint n_unique;
if (dict_index_is_ibuf(index)) {
/* In a universal index tree, we take the whole record as
the node pointer if the record is on the leaf level,
on non-leaf levels we remove the last field, which
contains the page number of the child page */
ut_a(!dict_table_is_comp(index->table));
n_unique = rec_get_n_fields_old(rec);
if (level > 0) {
ut_a(n_unique > 1);
n_unique--;
}
} else {
n_unique = dict_index_get_n_unique_in_tree_nonleaf(index);
}
ulint n_unique = dict_index_get_n_unique_in_tree_nonleaf(index);
tuple = dtuple_create(heap, n_unique + 1);

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2022, MariaDB Corporation.
Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -865,9 +865,7 @@ err_exit:
return READ_OK;
}
/** Check each tablespace found in the data dictionary.
Then look at each table defined in SYS_TABLES that has a space_id > 0
to find all the file-per-table tablespaces.
/** Open each tablespace found in the data dictionary.
In a crash recovery we already have some tablespace objects created from
processing the REDO log. We will compare the
@@ -876,14 +874,12 @@ tablespace file. In addition, more validation will be done if recovery
was needed and force_recovery is not set.
We also scan the biggest space id, and store it to fil_system. */
void dict_check_tablespaces_and_store_max_id()
void dict_load_tablespaces()
{
uint32_t max_space_id = 0;
btr_pcur_t pcur;
mtr_t mtr;
DBUG_ENTER("dict_check_tablespaces_and_store_max_id");
mtr.start();
dict_sys.lock(SRW_LOCK_CALL);
@@ -976,8 +972,6 @@ void dict_check_tablespaces_and_store_max_id()
fil_set_max_space_id_if_bigger(max_space_id);
dict_sys.unlock();
DBUG_VOID_RETURN;
}
/** Error message for a delete-marked record in dict_load_column_low() */
@@ -1125,7 +1119,7 @@ err_len:
prtype = dtype_form_prtype(
prtype,
data_mysql_default_charset_coll);
default_charset_info->number);
}
}

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2022, MariaDB Corporation.
Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -584,8 +584,6 @@ dict_stats_table_clone_create(
continue;
}
ut_ad(!dict_index_is_ibuf(index));
ulint n_uniq = dict_index_get_n_unique(index);
heap_size += sizeof(dict_index_t);
@@ -634,8 +632,6 @@ dict_stats_table_clone_create(
continue;
}
ut_ad(!dict_index_is_ibuf(index));
dict_index_t* idx;
idx = (dict_index_t*) mem_heap_zalloc(heap, sizeof(*idx));
@@ -714,7 +710,6 @@ dict_stats_empty_index(
/*!< in: whether to empty defrag stats */
{
ut_ad(!(index->type & DICT_FTS));
ut_ad(!dict_index_is_ibuf(index));
ut_ad(index->table->stats_mutex_is_owner());
ulint n_uniq = index->n_uniq;
@@ -767,8 +762,6 @@ dict_stats_empty_table(
continue;
}
ut_ad(!dict_index_is_ibuf(index));
dict_stats_empty_index(index, empty_defrag_stats);
}
@@ -901,8 +894,6 @@ dict_stats_copy(
}
}
ut_ad(!dict_index_is_ibuf(dst_idx));
if (!INDEX_EQ(src_idx, dst_idx)) {
for (src_idx = dict_table_get_first_index(src);
src_idx != NULL;
@@ -1094,11 +1085,10 @@ btr_cur_t::open_random_leaf(rec_offs *&offsets, mem_heap_t *&heap, mtr_t &mtr)
dberr_t err;
auto offset= index()->page;
bool merge= false;
ulint height= ULINT_UNDEFINED;
while (buf_block_t *block=
btr_block_get(*index(), offset, RW_S_LATCH, merge, &mtr, &err))
btr_block_get(*index(), offset, RW_S_LATCH, &mtr, &err))
{
page_cur.block= block;
@@ -1120,8 +1110,7 @@ btr_cur_t::open_random_leaf(rec_offs *&offsets, mem_heap_t *&heap, mtr_t &mtr)
return DB_SUCCESS;
}
if (!--height)
merge= !index()->is_clust();
height--;
page_cur_open_on_rnd_user_rec(&page_cur);
@@ -1462,10 +1451,6 @@ dummy_empty:
dict_stats_empty_index(index, false);
index->table->stats_mutex_unlock();
return err;
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
} else if (ibuf_debug && !dict_index_is_clust(index)) {
goto dummy_empty;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
} else if (dict_index_is_online_ddl(index) || !index->is_committed()
|| !index->table->space) {
goto dummy_empty;
@@ -1571,9 +1556,6 @@ empty_table:
}
for (; index != NULL; index = dict_table_get_next_index(index)) {
ut_ad(!dict_index_is_ibuf(index));
if (!index->is_btree()) {
continue;
}
@@ -1638,9 +1620,7 @@ static dberr_t page_cur_open_level(page_cur_t *page_cur, ulint level,
for (ulint height = ULINT_UNDEFINED;; height--)
{
buf_block_t* block=
btr_block_get(*index, page, RW_S_LATCH,
!height && !index->is_clust(), mtr, &err);
buf_block_t* block= btr_block_get(*index, page, RW_S_LATCH, mtr, &err);
if (!block)
break;
@@ -2258,9 +2238,7 @@ dict_stats_analyze_index_below_cur(
block = buf_page_get_gen(page_id, zip_size,
RW_S_LATCH, NULL, BUF_GET,
&mtr, &err,
!index->is_clust()
&& 1 == btr_page_get_level(page));
&mtr, &err);
if (!block) {
goto func_exit;
}
@@ -2999,7 +2977,6 @@ dict_stats_update_persistent(
return(DB_CORRUPTION);
}
ut_ad(!dict_index_is_ibuf(index));
table->stats_mutex_lock();
dict_stats_empty_index(index, false);
table->stats_mutex_unlock();
@@ -3373,8 +3350,6 @@ unlocked_free_and_exit:
continue;
}
ut_ad(!dict_index_is_ibuf(index));
for (unsigned i = 0; i < index->n_uniq; i++) {
char stat_name[16];

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2021, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2022, MariaDB Corporation.
Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -45,7 +45,6 @@ Created 10/25/1995 Heikki Tuuri
#include "srv0start.h"
#include "trx0purge.h"
#include "buf0lru.h"
#include "ibuf0ibuf.h"
#include "buf0flu.h"
#include "log.h"
#ifdef __linux__
@@ -1414,7 +1413,7 @@ void fil_system_t::set_write_through(bool write_through)
{
mysql_mutex_lock(&mutex);
if (write_through != this->write_through)
if (write_through != is_write_through())
{
this->write_through= write_through;
fil_space_t::reopen_all();
@@ -1427,7 +1426,7 @@ void fil_system_t::set_buffered(bool buffered)
{
mysql_mutex_lock(&mutex);
if (buffered != this->buffered)
if (buffered != is_buffered())
{
this->buffered= buffered;
fil_space_t::reopen_all();
@@ -1718,7 +1717,6 @@ pfs_os_file_t fil_delete_tablespace(uint32_t id)
fil_space_free_low(space);
}
ibuf_delete_for_discarded_space(id);
return handle;
}
@@ -2859,10 +2857,6 @@ write_completed:
{
ut_ad(request.is_read());
/* IMPORTANT: since i/o handling for reads will read also the insert
buffer in fil_system.sys_space, we have to be very careful not to
introduce deadlocks. We never close fil_system.sys_space data
files and never issue asynchronous reads of change buffer pages. */
const page_id_t id(request.bpage->id());
if (dberr_t err= request.bpage->read_complete(*request.node))

View File

@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (C) 2013, 2021, MariaDB Corporation.
Copyright (C) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -47,7 +47,6 @@ Updated 14/02/2015
#include "trx0sys.h"
#include "row0mysql.h"
#include "buf0lru.h"
#include "ibuf0ibuf.h"
#include "zlib.h"
#ifdef __linux__
#include <linux/fs.h>

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,6 @@ Created 11/29/1995 Heikki Tuuri
#include "page0page.h"
#include "srv0srv.h"
#include "srv0start.h"
#include "ibuf0ibuf.h"
#include "btr0btr.h"
#include "btr0sea.h"
#include "dict0boot.h"
@@ -848,11 +847,17 @@ fsp_fill_free_list(
{
buf_block_t *f= buf_LRU_get_free_block(false);
buf_block_t *block=
buf_page_create(space,
static_cast<uint32_t>(i + FSP_IBUF_BITMAP_OFFSET),
buf_page_create(space, static_cast<uint32_t>(i + 1),
zip_size, mtr, f);
if (UNIV_UNLIKELY(block != f))
buf_pool.free_block(f);
/* The zero-initialization will reset the change buffer bitmap bits
to safe values for possible import to an earlier version that
supports change buffering:
IBUF_BITMAP_FREE = 0 (no space left for buffering inserts)
IBUF_BITMAP_BUFFERED = 0 (no changes have been buffered)
IBUF_BITMAP_IBUF = 0 (not part of the change buffer) */
fsp_init_file_page(space, block, mtr);
mtr->write<2>(*block, FIL_PAGE_TYPE + block->page.frame,
FIL_PAGE_IBUF_BITMAP);
@@ -877,9 +882,9 @@ fsp_fill_free_list(
if (UNIV_UNLIKELY(init_xdes))
{
/* The first page in the extent is a descriptor page and the
second is an ibuf bitmap page: mark them used */
second was reserved for change buffer bitmap: mark them used */
xdes_set_free<false>(*xdes, descr, 0, mtr);
xdes_set_free<false>(*xdes, descr, FSP_IBUF_BITMAP_OFFSET, mtr);
xdes_set_free<false>(*xdes, descr, 1, mtr);
xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr);
if (dberr_t err= flst_add_last(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG,
xdes, xoffset, mtr))

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2018, 2022, MariaDB Corporation.
Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -34,7 +34,6 @@ Created 2013/03/27 Allen Lai and Jimmy Yang
#include "btr0pcur.h"
#include "rem0cmp.h"
#include "lock0lock.h"
#include "ibuf0ibuf.h"
#include "trx0undo.h"
#include "srv0mon.h"
#include "gis0geo.h"
@@ -538,7 +537,7 @@ err_exit:
mem_heap_free(heap);
}
MY_ATTRIBUTE((nonnull, warn_unused_result))
MY_ATTRIBUTE((nonnull(1,3,4,5,6,8), warn_unused_result))
/**************************************************************//**
Update parent page's MBR and Predicate lock information during a split */
static
@@ -552,6 +551,7 @@ rtr_adjust_upper_level(
buf_block_t* new_block, /*!< in/out: the new half page */
rtr_mbr_t* mbr, /*!< in: MBR on the old page */
rtr_mbr_t* new_mbr, /*!< in: MBR on the new page */
que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr) /*!< in: mtr */
{
ulint page_no;
@@ -570,7 +570,6 @@ rtr_adjust_upper_level(
/* Create a memory heap where the data tuple is stored */
heap = mem_heap_create(1024);
cursor.thr = sea_cur->thr;
cursor.page_cur.index = sea_cur->index();
cursor.page_cur.block = block;
@@ -584,7 +583,8 @@ rtr_adjust_upper_level(
/* Set new mbr for the old page on the upper level. */
/* Look up the index for the node pointer to page */
offsets = rtr_page_get_father_block(NULL, heap, mtr, sea_cur, &cursor);
offsets = rtr_page_get_father_block(nullptr, heap, sea_cur, &cursor,
thr, mtr);
page_cursor = btr_cur_get_page_cur(&cursor);
@@ -669,7 +669,7 @@ rtr_adjust_upper_level(
if (next_page_no == FIL_NULL) {
} else if (buf_block_t* next_block =
btr_block_get(*sea_cur->index(), next_page_no, RW_X_LATCH,
false, mtr, &err)) {
mtr, &err)) {
if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame
+ FIL_PAGE_PREV,
block->page.frame
@@ -691,11 +691,6 @@ rtr_adjust_upper_level(
/*************************************************************//**
Moves record list to another page for rtree splitting.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return error code
@retval DB_FAIL on ROW_FORMAT=COMPRESSED compression failure */
static
@@ -731,8 +726,7 @@ rtr_split_page_move_rec_list(
ulint max_to_move = 0;
rtr_rec_move_t* rec_move = NULL;
ut_ad(!dict_index_is_ibuf(index));
ut_ad(dict_index_is_spatial(index));
ut_ad(index->is_spatial());
rec_offs_init(offsets_);
@@ -867,7 +861,8 @@ rtr_page_split_and_insert(
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
dberr_t* err, /*!< out: error code */
que_thr_t* thr) /*!< in: query thread */
{
buf_block_t* block;
page_t* page;
@@ -1159,7 +1154,7 @@ after_insert:
/* Adjust the upper level. */
*err = rtr_adjust_upper_level(cursor, flags, block, new_block,
&mbr, &new_mbr, mtr);
&mbr, &new_mbr, thr, mtr);
if (UNIV_UNLIKELY(*err != DB_SUCCESS)) {
return nullptr;
}
@@ -1179,13 +1174,6 @@ after_insert:
/* If the new res insert fail, we need to do another split
again. */
if (!rec) {
/* We play safe and reset the free bits for new_page */
if (!dict_index_is_clust(cursor->index())
&& !cursor->index()->table->is_temporary()) {
ibuf_reset_free_bits(new_block);
ibuf_reset_free_bits(block);
}
/* We need to clean the parent path here and search father
node later, otherwise, it's possible that find a wrong
parent. */
@@ -1214,6 +1202,244 @@ after_insert:
return(rec);
}
/*************************************************************//**
Makes tree one level higher by splitting the root, and inserts the tuple.
NOTE that the operation of this function must always succeed,
we cannot reverse it: therefore enough free disk space must be
guaranteed to be available before this function is called.
@return inserted record */
rec_t*
rtr_root_raise_and_insert(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
rec_offs** offsets,/*!< out: offsets on inserted record */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err, /*!< out: error code */
que_thr_t* thr) /*!< in: query thread */
{
dict_index_t* index;
rec_t* rec;
dtuple_t* node_ptr;
ulint level;
rec_t* node_ptr_rec;
page_cur_t* page_cursor;
page_zip_des_t* root_page_zip;
page_zip_des_t* new_page_zip;
buf_block_t* root;
buf_block_t* new_block;
root = btr_cur_get_block(cursor);
root_page_zip = buf_block_get_page_zip(root);
ut_ad(!page_is_empty(root->page.frame));
index = btr_cur_get_index(cursor);
ut_ad(index->is_spatial());
#ifdef UNIV_ZIP_DEBUG
ut_a(!root_page_zip
|| page_zip_validate(root_page_zip, root->page.frame, index));
#endif /* UNIV_ZIP_DEBUG */
const page_id_t root_id{root->page.id()};
ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
| MTR_MEMO_SX_LOCK));
ut_ad(mtr->memo_contains_flagged(root, MTR_MEMO_PAGE_X_FIX));
if (index->page != root_id.page_no()) {
ut_ad("corrupted root page number" == 0);
return nullptr;
}
if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
*root, *index->table->space)
|| !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
*root, *index->table->space)) {
return nullptr;
}
/* Allocate a new page to the tree. Root splitting is done by first
moving the root records to the new page, emptying the root, putting
a node pointer to the new page, and then splitting the new page. */
level = btr_page_get_level(root->page.frame);
new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr, err);
if (!new_block) {
return nullptr;
}
new_page_zip = buf_block_get_page_zip(new_block);
ut_a(!new_page_zip == !root_page_zip);
ut_a(!new_page_zip
|| page_zip_get_size(new_page_zip)
== page_zip_get_size(root_page_zip));
btr_page_create(new_block, new_page_zip, index, level, mtr);
if (page_has_siblings(new_block->page.frame)) {
compile_time_assert(FIL_PAGE_NEXT == FIL_PAGE_PREV + 4);
compile_time_assert(FIL_NULL == 0xffffffff);
static_assert(FIL_PAGE_PREV % 8 == 0, "alignment");
memset_aligned<8>(new_block->page.frame + FIL_PAGE_PREV,
0xff, 8);
mtr->memset(new_block, FIL_PAGE_PREV, 8, 0xff);
if (UNIV_LIKELY_NULL(new_page_zip)) {
memset_aligned<8>(new_page_zip->data + FIL_PAGE_PREV,
0xff, 8);
}
}
/* Copy the records from root to the new page one by one. */
dberr_t e;
if (!err) {
err = &e;
}
if (0
#ifdef UNIV_ZIP_COPY
|| new_page_zip
#endif /* UNIV_ZIP_COPY */
|| !page_copy_rec_list_end(new_block, root,
page_get_infimum_rec(root->page.frame),
index, mtr, err)) {
switch (*err) {
case DB_SUCCESS:
break;
case DB_FAIL:
*err = DB_SUCCESS;
break;
default:
return nullptr;
}
ut_a(new_page_zip);
/* Copy the page byte for byte. */
page_zip_copy_recs(new_block, root_page_zip,
root->page.frame, index, mtr);
/* Update the lock table and possible hash index. */
if (index->has_locking()) {
lock_move_rec_list_end(
new_block, root,
page_get_infimum_rec(root->page.frame));
}
/* Move any existing predicate locks */
lock_prdt_rec_move(new_block, root_id);
}
constexpr uint16_t max_trx_id = PAGE_HEADER + PAGE_MAX_TRX_ID;
if (!index->is_primary()) {
/* In secondary indexes,
PAGE_MAX_TRX_ID can be reset on the root page, because
the field only matters on leaf pages, and the root no
longer is a leaf page. (Older versions of InnoDB did
set PAGE_MAX_TRX_ID on all secondary index pages.) */
byte* p = my_assume_aligned<8>(
PAGE_HEADER + PAGE_MAX_TRX_ID + root->page.frame);
if (mach_read_from_8(p)) {
mtr->memset(root, max_trx_id, 8, 0);
if (UNIV_LIKELY_NULL(root->page.zip.data)) {
memset_aligned<8>(max_trx_id
+ root->page.zip.data, 0, 8);
}
}
} else {
/* PAGE_ROOT_AUTO_INC is only present in the clustered index
root page; on other clustered index pages, we want to reserve
the field PAGE_MAX_TRX_ID for future use. */
byte* p = my_assume_aligned<8>(
PAGE_HEADER + PAGE_MAX_TRX_ID + new_block->page.frame);
if (mach_read_from_8(p)) {
mtr->memset(new_block, max_trx_id, 8, 0);
if (UNIV_LIKELY_NULL(new_block->page.zip.data)) {
memset_aligned<8>(max_trx_id
+ new_block->page.zip.data,
0, 8);
}
}
}
/* If this is a pessimistic insert which is actually done to
perform a pessimistic update then we have stored the lock
information of the record to be inserted on the infimum of the
root page: we cannot discard the lock structs on the root page */
if (index->has_locking()) {
lock_update_root_raise(*new_block, root_id);
}
/* Create a memory heap where the node pointer is stored */
if (!*heap) {
*heap = mem_heap_create(1000);
}
const uint32_t new_page_no = new_block->page.id().page_no();
rec = page_rec_get_next(page_get_infimum_rec(new_block->page.frame));
ut_ad(rec); /* We just created the page. */
/* Build the node pointer (= node key and page address) for the
child */
rtr_mbr_t new_mbr;
rtr_page_cal_mbr(index, new_block, &new_mbr, *heap);
node_ptr = rtr_index_build_node_ptr(index, &new_mbr, rec, new_page_no,
*heap);
/* The node pointer must be marked as the predefined minimum record,
as there is no lower alphabetical limit to records in the leftmost
node of a level: */
dtuple_set_info_bits(node_ptr,
dtuple_get_info_bits(node_ptr)
| REC_INFO_MIN_REC_FLAG);
/* Rebuild the root page to get free space */
btr_page_empty(root, root_page_zip, index, level + 1, mtr);
ut_ad(!page_has_siblings(root->page.frame));
page_cursor = btr_cur_get_page_cur(cursor);
/* Insert node pointer to the root */
page_cur_set_before_first(root, page_cursor);
node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
offsets, heap, 0, mtr);
/* The root page should only contain the node pointer
to new_block at this point. Thus, the data should fit. */
ut_a(node_ptr_rec);
page_cursor->block = new_block;
page_cursor->index = index;
if (tuple) {
ut_ad(dtuple_check_typed(tuple));
/* Reposition the cursor to the child node */
ulint low_match = 0, up_match = 0;
if (page_cur_search_with_match(tuple, PAGE_CUR_LE,
&up_match, &low_match,
page_cursor, nullptr)) {
if (err) {
*err = DB_CORRUPTION;
}
return nullptr;
}
} else {
page_cursor->rec = page_get_infimum_rec(new_block->page.frame);
}
/* Split the child and insert tuple */
return rtr_page_split_and_insert(flags, cursor, offsets, heap,
tuple, n_ext, mtr, err, thr);
}
/****************************************************************//**
Following the right link to find the proper block for insert.
@return the proper block.*/
@@ -1242,6 +1468,7 @@ rtr_ins_enlarge_mbr(
/* Check path info is not empty. */
ut_ad(!btr_cur->rtr_info->parent_path->empty());
ut_ad(btr_cur->rtr_info->thr || !btr_cur->index()->is_committed());
/* Create a memory heap. */
heap = mem_heap_create(1024);
@@ -1267,7 +1494,8 @@ rtr_ins_enlarge_mbr(
cursor.page_cur.index = page_cursor->index;
cursor.page_cur.block = block;
offsets = rtr_page_get_father_block(
NULL, heap, mtr, btr_cur, &cursor);
nullptr, heap, btr_cur, &cursor,
btr_cur->rtr_info->thr, mtr);
page = buf_block_get_frame(block);

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2016, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -34,7 +34,6 @@ Created 2014/01/16 Jimmy Yang
#include "btr0pcur.h"
#include "rem0cmp.h"
#include "lock0lock.h"
#include "ibuf0ibuf.h"
#include "trx0trx.h"
#include "srv0mon.h"
#include "que0que.h"
@@ -511,13 +510,13 @@ static void rtr_compare_cursor_rec(const rec_t *rec, dict_index_t *index,
/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. Mainly called by row_search_index_entry() */
closed with btr_pcur_close. */
bool
rtr_pcur_open(
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
que_thr_t* thr, /*!< in/out; query thread */
mtr_t* mtr) /*!< in: mtr */
{
static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), "");
@@ -534,15 +533,16 @@ rtr_pcur_open(
/* Search with the tree cursor */
btr_cur_t* btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cursor->page_cur.index = index;
dict_index_t* const index = cursor->index();
btr_cursor->rtr_info = rtr_create_rtr_info(false, false,
btr_cursor, index);
btr_cursor->rtr_info = rtr_create_rtr_info(false, false, thr,
btr_cursor);
/* Purge will SX lock the tree instead of take Page Locks */
if (btr_cursor->thr) {
if (!thr) {
/* Purge will U lock the tree instead of take Page Locks */
} else {
btr_cursor->rtr_info->need_page_lock = true;
btr_cursor->rtr_info->thr = btr_cursor->thr;
btr_cursor->rtr_info->thr = thr;
}
if ((latch_mode & 8) && index->lock.have_u_not_x()) {
@@ -607,12 +607,14 @@ rtr_pcur_open(
about parent nodes in search
@param[out] cursor cursor on node pointer record,
its page x-latched
@param[in,out] thr query thread
@return whether the cursor was successfully positioned */
bool rtr_page_get_father(mtr_t *mtr, btr_cur_t *sea_cur, btr_cur_t *cursor)
bool rtr_page_get_father(mtr_t *mtr, btr_cur_t *sea_cur, btr_cur_t *cursor,
que_thr_t *thr)
{
mem_heap_t *heap = mem_heap_create(100);
rec_offs *offsets= rtr_page_get_father_block(nullptr, heap,
mtr, sea_cur, cursor);
sea_cur, cursor, thr, mtr);
mem_heap_free(heap);
return offsets != nullptr;
}
@@ -629,12 +631,13 @@ static const rec_t* rtr_get_father_node(
btr_cur_t* sea_cur,/*!< in: search cursor */
btr_cur_t* btr_cur,/*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
que_thr_t* thr, /*!< in/out: query thread */
ulint page_no,/*!< Current page no */
mtr_t* mtr) /*!< in: mtr */
{
const rec_t* rec = nullptr;
auto had_rtr = btr_cur->rtr_info;
dict_index_t* const index = btr_cur->index();
ut_d(dict_index_t* const index = btr_cur->index());
/* Try to optimally locate the parent node. Level should always
less than sea_cur->tree_height unless the root is splitting */
@@ -666,7 +669,7 @@ static const rec_t* rtr_get_father_node(
rtr_clean_rtr_info(btr_cur->rtr_info, true);
}
btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index);
btr_cur->rtr_info = rtr_create_rtr_info(false, false, thr, btr_cur);
if (btr_cur_search_to_nth_level(level, tuple,
PAGE_CUR_RTREE_LOCATE,
@@ -718,6 +721,7 @@ rtr_page_get_father_node_ptr(
btr_cur_t* cursor, /*!< in: cursor pointing to user record,
out: cursor on node pointer record,
its page x-latched */
que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr) /*!< in: mtr */
{
dtuple_t* tuple;
@@ -754,7 +758,7 @@ rtr_page_get_father_node_ptr(
const rec_t* node_ptr = rtr_get_father_node(level + 1, tuple,
sea_cur, cursor,
page_no, mtr);
thr, page_no, mtr);
if (!node_ptr) {
return nullptr;
}
@@ -780,18 +784,20 @@ rtr_page_get_father_block(
/*======================*/
rec_offs* offsets,/*!< in: work area for the return value */
mem_heap_t* heap, /*!< in: memory heap to use */
mtr_t* mtr, /*!< in: mtr */
btr_cur_t* sea_cur,/*!< in: search cursor, contains information
about parent nodes in search */
btr_cur_t* cursor) /*!< out: cursor on node pointer record,
btr_cur_t* cursor, /*!< out: cursor on node pointer record,
its page x-latched */
que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr) /*!< in/out: mtr */
{
rec_t *rec=
page_rec_get_next(page_get_infimum_rec(cursor->block()->page.frame));
if (!rec)
return nullptr;
cursor->page_cur.rec= rec;
return rtr_page_get_father_node_ptr(offsets, heap, sea_cur, cursor, mtr);
return rtr_page_get_father_node_ptr(offsets, heap, sea_cur, cursor,
thr, mtr);
}
/*******************************************************************//**
@@ -804,12 +810,12 @@ rtr_create_rtr_info(
bool init_matches, /*!< in: Whether to initiate the
"matches" structure for collecting
matched leaf records */
btr_cur_t* cursor, /*!< in: tree search cursor */
dict_index_t* index) /*!< in: index struct */
que_thr_t* thr, /*!< in/out: query thread */
btr_cur_t* cursor) /*!< in: tree search cursor */
{
rtr_info_t* rtr_info;
index = index ? index : cursor->index();
dict_index_t* index = cursor->index();
ut_ad(index);
rtr_info = static_cast<rtr_info_t*>(ut_zalloc_nokey(sizeof(*rtr_info)));
@@ -817,6 +823,7 @@ rtr_create_rtr_info(
rtr_info->allocated = true;
rtr_info->cursor = cursor;
rtr_info->index = index;
rtr_info->thr = thr;
if (init_matches) {
rtr_info->heap = mem_heap_create(sizeof(*(rtr_info->matches)));

View File

@@ -87,7 +87,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include "fts0plugin.h"
#include "fts0priv.h"
#include "fts0types.h"
#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "log0crypt.h"
#include "mtr0mtr.h"
@@ -399,25 +398,6 @@ static TYPELIB innodb_deadlock_report_typelib = {
NULL
};
/** Allowed values of innodb_change_buffering */
static const char* innodb_change_buffering_names[] = {
"none", /* IBUF_USE_NONE */
"inserts", /* IBUF_USE_INSERT */
"deletes", /* IBUF_USE_DELETE_MARK */
"changes", /* IBUF_USE_INSERT_DELETE_MARK */
"purges", /* IBUF_USE_DELETE */
"all", /* IBUF_USE_ALL */
NullS
};
/** Enumeration of innodb_change_buffering */
static TYPELIB innodb_change_buffering_typelib = {
array_elements(innodb_change_buffering_names) - 1,
"innodb_change_buffering_typelib",
innodb_change_buffering_names,
NULL
};
/** Allowed values of innodb_instant_alter_column_allowed */
const char* innodb_instant_alter_column_allowed_names[] = {
"never", /* compatible with MariaDB 5.5 to 10.2 */
@@ -531,9 +511,6 @@ mysql_pfs_key_t fts_cache_mutex_key;
mysql_pfs_key_t fts_cache_init_mutex_key;
mysql_pfs_key_t fts_delete_mutex_key;
mysql_pfs_key_t fts_doc_id_mutex_key;
mysql_pfs_key_t ibuf_bitmap_mutex_key;
mysql_pfs_key_t ibuf_mutex_key;
mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
mysql_pfs_key_t recalc_pool_mutex_key;
mysql_pfs_key_t purge_sys_pq_mutex_key;
mysql_pfs_key_t recv_sys_mutex_key;
@@ -565,8 +542,6 @@ static PSI_mutex_info all_innodb_mutexes[] = {
PSI_KEY(fts_cache_init_mutex),
PSI_KEY(fts_delete_mutex),
PSI_KEY(fts_doc_id_mutex),
PSI_KEY(ibuf_mutex),
PSI_KEY(ibuf_pessimistic_insert_mutex),
PSI_KEY(index_online_log),
PSI_KEY(page_zip_stat_per_index_mutex),
PSI_KEY(purge_sys_pq_mutex),
@@ -973,20 +948,6 @@ static SHOW_VAR innodb_status_variables[]= {
{"dblwr_writes", &export_vars.innodb_dblwr_writes, SHOW_SIZE_T},
{"deadlocks", &lock_sys.deadlocks, SHOW_SIZE_T},
{"history_list_length", &export_vars.innodb_history_list_length,SHOW_SIZE_T},
{"ibuf_discarded_delete_marks", &ibuf.n_discarded_ops[IBUF_OP_DELETE_MARK],
SHOW_SIZE_T},
{"ibuf_discarded_deletes", &ibuf.n_discarded_ops[IBUF_OP_DELETE],
SHOW_SIZE_T},
{"ibuf_discarded_inserts", &ibuf.n_discarded_ops[IBUF_OP_INSERT],
SHOW_SIZE_T},
{"ibuf_free_list", &ibuf.free_list_len, SHOW_SIZE_T},
{"ibuf_merged_delete_marks", &ibuf.n_merged_ops[IBUF_OP_DELETE_MARK],
SHOW_SIZE_T},
{"ibuf_merged_deletes", &ibuf.n_merged_ops[IBUF_OP_DELETE], SHOW_SIZE_T},
{"ibuf_merged_inserts", &ibuf.n_merged_ops[IBUF_OP_INSERT], SHOW_SIZE_T},
{"ibuf_merges", &ibuf.n_merges, SHOW_SIZE_T},
{"ibuf_segment_size", &ibuf.seg_size, SHOW_SIZE_T},
{"ibuf_size", &ibuf.size, SHOW_SIZE_T},
{"log_waits", &log_sys.waits, SHOW_SIZE_T},
{"log_write_requests", &log_sys.write_to_buf, SHOW_SIZE_T},
{"log_writes", &log_sys.write_to_log, SHOW_SIZE_T},
@@ -3927,8 +3888,6 @@ static int innodb_init_params()
DBUG_RETURN(HA_ERR_INITIALIZATION);
}
DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL);
/* Check that interdependent parameters have sane values. */
if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) {
sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm"
@@ -4005,11 +3964,6 @@ static int innodb_init_params()
fts_sort_pll_degree = num_pll_degree;
/* Store the default charset-collation number of this MySQL
installation */
data_mysql_default_charset_coll = (ulint) default_charset_info->number;
if (innodb_flush_method == 1 /* O_DSYNC */) {
log_sys.log_write_through = true;
fil_system.write_through = true;
@@ -4213,8 +4167,6 @@ static int innodb_init(void* p)
innobase_old_blocks_pct = buf_LRU_old_ratio_update(
innobase_old_blocks_pct, true);
ibuf_max_size_update(srv_change_buffer_max_size);
mysql_mutex_init(pending_checkpoint_mutex_key,
&log_requests.mutex,
MY_MUTEX_INIT_FAST);
@@ -4345,7 +4297,7 @@ innobase_start_trx_and_assign_read_view(
Do this only if transaction is using REPEATABLE READ isolation
level. */
trx->isolation_level = innobase_map_isolation_level(
thd_get_trx_isolation(thd));
thd_get_trx_isolation(thd)) & 3;
if (trx->isolation_level == TRX_ISO_REPEATABLE_READ) {
trx->read_view.open(trx);
@@ -6593,8 +6545,7 @@ uint8_t
get_innobase_type_from_mysql_type(unsigned *unsigned_flag, const Field *field)
{
/* The following asserts try to check that the MySQL type code fits in
8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
the type */
8 bits: this is used when DATA_NOT_NULL is ORed to the type */
static_assert(MYSQL_TYPE_STRING < 256, "compatibility");
static_assert(MYSQL_TYPE_VAR_STRING < 256, "compatibility");
@@ -15264,7 +15215,7 @@ ha_innobase::check(
}
/* Restore the original isolation level */
m_prebuilt->trx->isolation_level = old_isolation_level;
m_prebuilt->trx->isolation_level = old_isolation_level & 3;
#ifdef BTR_CUR_HASH_ADAPT
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/* We validate the whole adaptive hash index for all tables
@@ -16297,7 +16248,7 @@ ha_innobase::store_lock(
if (lock_type != TL_IGNORE
&& trx->n_mysql_tables_in_use == 0) {
trx->isolation_level = innobase_map_isolation_level(
(enum_tx_isolation) thd_tx_isolation(thd));
(enum_tx_isolation) thd_tx_isolation(thd)) & 3;
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
@@ -17438,20 +17389,6 @@ innodb_old_blocks_pct_update(THD*, st_mysql_sys_var*, void*, const void* save)
innobase_old_blocks_pct = ratio;
}
/****************************************************************//**
Update the system variable innodb_old_blocks_pct using the "saved"
value. This function is registered as a callback with MySQL. */
static
void
innodb_change_buffer_max_size_update(THD*, st_mysql_sys_var*, void*,
const void* save)
{
srv_change_buffer_max_size = *static_cast<const uint*>(save);
mysql_mutex_unlock(&LOCK_global_system_variables);
ibuf_max_size_update(srv_change_buffer_max_size);
mysql_mutex_lock(&LOCK_global_system_variables);
}
#ifdef UNIV_DEBUG
static uint srv_fil_make_page_dirty_debug = 0;
static uint srv_saved_page_number_debug;
@@ -19479,19 +19416,6 @@ static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
NULL, NULL, FALSE);
#endif /* HAVE_LIBNUMA */
static MYSQL_SYSVAR_ENUM(change_buffering, innodb_change_buffering,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED,
"Buffer changes to secondary indexes.",
nullptr, nullptr, IBUF_USE_NONE, &innodb_change_buffering_typelib);
static MYSQL_SYSVAR_UINT(change_buffer_max_size,
srv_change_buffer_max_size,
PLUGIN_VAR_RQCMDARG,
"Maximum on-disk size of change buffer in terms of percentage"
" of the buffer pool.",
NULL, innodb_change_buffer_max_size_update,
CHANGE_BUFFER_DEFAULT_SIZE, 0, 50, 0);
static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
PLUGIN_VAR_RQCMDARG,
"Specifies how InnoDB index statistics collection code should"
@@ -19499,18 +19423,6 @@ static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
" NULLS_UNEQUAL and NULLS_IGNORED",
NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
static MYSQL_SYSVAR_BOOL(change_buffer_dump, ibuf_dump,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Dump the change buffer at startup.",
NULL, NULL, FALSE);
static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
PLUGIN_VAR_RQCMDARG,
"Debug flags for InnoDB change buffering (0=none, 1=try to buffer)",
NULL, NULL, 0, 0, 1, 0);
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequency,
PLUGIN_VAR_RQCMDARG,
"A number between [0, 100] that tells how oftern buffer pool dump status "
@@ -19840,12 +19752,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
#ifdef HAVE_LIBNUMA
MYSQL_SYSVAR(numa_interleave),
#endif /* HAVE_LIBNUMA */
MYSQL_SYSVAR(change_buffering),
MYSQL_SYSVAR(change_buffer_max_size),
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
MYSQL_SYSVAR(change_buffer_dump),
MYSQL_SYSVAR(change_buffering_debug),
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
MYSQL_SYSVAR(random_read_ahead),
MYSQL_SYSVAR(read_ahead_threshold),
MYSQL_SYSVAR(read_only),

View File

@@ -2154,8 +2154,7 @@ next_page:
}
next_page= false;
block= btr_block_get(*clust_index, next_page_no, BTR_SEARCH_LEAF, false,
&mtr);
block= btr_block_get(*clust_index, next_page_no, BTR_SEARCH_LEAF, &mtr);
if (!block)
goto non_empty;
page_cur_set_before_first(block, cur);

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2022, MariaDB Corporation.
Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,7 +39,6 @@ Created July 18, 2007 Vasil Dimov
#include "dict0load.h"
#include "buf0buddy.h"
#include "buf0buf.h"
#include "ibuf0ibuf.h"
#include "dict0mem.h"
#include "dict0types.h"
#include "srv0start.h"
@@ -80,10 +79,7 @@ in i_s_page_type[] array */
/** R-tree index page */
#define I_S_PAGE_TYPE_RTREE (FIL_PAGE_TYPE_LAST + 1)
/** Change buffer B-tree page */
#define I_S_PAGE_TYPE_IBUF (FIL_PAGE_TYPE_LAST + 2)
#define I_S_PAGE_TYPE_LAST I_S_PAGE_TYPE_IBUF
#define I_S_PAGE_TYPE_LAST I_S_PAGE_TYPE_RTREE
#define I_S_PAGE_TYPE_BITS 4
@@ -104,9 +100,6 @@ static buf_page_desc_t i_s_page_type[] = {
{"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2},
{"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN},
{"RTREE_INDEX", I_S_PAGE_TYPE_RTREE},
{"IBUF_INDEX", I_S_PAGE_TYPE_IBUF},
{"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED},
{"PAGE COMPRESSED AND ENCRYPTED", FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED},
};
/** This structure defines information we will fetch from pages
@@ -3776,17 +3769,17 @@ i_s_innodb_buffer_page_fill(
OK(fields[IDX_BUFFER_PAGE_STATE]->store(
std::min<uint32_t>(3, page_info->state) + 1, true));
static_assert(buf_page_t::UNFIXED == 1U << 29, "comp.");
static_assert(buf_page_t::UNFIXED == 2U << 29, "comp.");
static_assert(buf_page_t::READ_FIX == 4U << 29, "comp.");
static_assert(buf_page_t::WRITE_FIX == 5U << 29, "comp.");
static_assert(buf_page_t::WRITE_FIX == 6U << 29, "comp.");
unsigned io_fix = page_info->state >> 29;
if (io_fix < 4) {
io_fix = 1;
} else if (io_fix > 5) {
io_fix = 3;
} else if (io_fix == 4) {
io_fix = 2;
} else {
io_fix -= 2;
io_fix = 3;
}
OK(fields[IDX_BUFFER_PAGE_IO_FIX]->store(io_fix, true));
@@ -3824,14 +3817,9 @@ i_s_innodb_set_page_type(
their values are defined as 17855 and 17854, so we cannot
use them to index into i_s_page_type[] array, its array index
in the i_s_page_type[] array is I_S_PAGE_TYPE_INDEX
(1) for index pages or I_S_PAGE_TYPE_IBUF for
change buffer index pages */
(1) for index pages */
if (page_type == FIL_PAGE_RTREE) {
page_info->page_type = I_S_PAGE_TYPE_RTREE;
} else if (page_info->index_id
== static_cast<index_id_t>(DICT_IBUF_ID_MIN
+ IBUF_SPACE_ID)) {
page_info->page_type = I_S_PAGE_TYPE_IBUF;
} else {
ut_ad(page_type == FIL_PAGE_INDEX
|| page_type == FIL_PAGE_TYPE_INSTANT);
@@ -3876,9 +3864,9 @@ i_s_innodb_buffer_page_get_info(
static_assert(buf_page_t::NOT_USED == 0, "compatibility");
static_assert(buf_page_t::MEMORY == 1, "compatibility");
static_assert(buf_page_t::REMOVE_HASH == 2, "compatibility");
static_assert(buf_page_t::UNFIXED == 1U << 29, "compatibility");
static_assert(buf_page_t::UNFIXED == 2U << 29, "compatibility");
static_assert(buf_page_t::READ_FIX == 4U << 29, "compatibility");
static_assert(buf_page_t::WRITE_FIX == 5U << 29, "compatibility");
static_assert(buf_page_t::WRITE_FIX == 6U << 29, "compatibility");
page_info->state = bpage->state();
@@ -4268,17 +4256,17 @@ i_s_innodb_buf_page_lru_fill(
OK(fields[IDX_BUF_LRU_PAGE_STATE]->store(
page_info->compressed_only, true));
static_assert(buf_page_t::UNFIXED == 1U << 29, "comp.");
static_assert(buf_page_t::UNFIXED == 2U << 29, "comp.");
static_assert(buf_page_t::READ_FIX == 4U << 29, "comp.");
static_assert(buf_page_t::WRITE_FIX == 5U << 29, "comp.");
static_assert(buf_page_t::WRITE_FIX == 6U << 29, "comp.");
unsigned io_fix = page_info->state >> 29;
if (io_fix < 4) {
io_fix = 1;
} else if (io_fix > 5) {
io_fix = 3;
} else if (io_fix == 4) {
io_fix = 2;
} else {
io_fix -= 2;
io_fix = 3;
}
OK(fields[IDX_BUF_LRU_PAGE_IO_FIX]->store(io_fix, true));

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2014, 2022, MariaDB Corporation.
Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -56,12 +56,8 @@ is acceptable for the program to die with a clear assert failure. */
#define BTR_MAX_LEVELS 100
#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \
btr_latch_mode((latch_mode) & ~(BTR_INSERT \
| BTR_DELETE_MARK \
| BTR_RTREE_UNDO_INS \
btr_latch_mode((latch_mode) & ~(BTR_RTREE_UNDO_INS \
| BTR_RTREE_DELETE_MARK \
| BTR_DELETE \
| BTR_IGNORE_SEC_UNIQUE \
| BTR_ALREADY_S_LATCHED \
| BTR_LATCH_FOR_INSERT \
| BTR_LATCH_FOR_DELETE))
@@ -79,6 +75,14 @@ btr_root_adjust_on_import(
const dict_index_t* index) /*!< in: index tree */
MY_ATTRIBUTE((warn_unused_result));
/** Check a file segment header within a B-tree root page.
@param offset file segment header offset
@param block B-tree root page
@param space tablespace
@return whether the segment header is valid */
bool btr_root_fseg_validate(ulint offset, const buf_block_t &block,
const fil_space_t &space);
/** Report a decryption failure. */
ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index);
@@ -86,12 +90,11 @@ ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index);
@param[in] index index tree
@param[in] page page number
@param[in] mode latch mode
@param[in] merge whether change buffer merge should be attempted
@param[in,out] mtr mini-transaction
@param[out] err error code
@return block */
buf_block_t *btr_block_get(const dict_index_t &index,
uint32_t page, ulint mode, bool merge,
uint32_t page, ulint mode,
mtr_t *mtr, dberr_t *err= nullptr);
/**************************************************************//**
@@ -242,15 +245,7 @@ btr_root_raise_and_insert(
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
MY_ATTRIBUTE((warn_unused_result));
/*************************************************************//**
Reorganizes an index page.
IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index. This has to
be done either within the same mini-transaction, or by invoking
ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
IBUF_BITMAP_FREE is unaffected by reorganization.
/** Reorganize an index page.
@param cursor page cursor
@param mtr mini-transaction
@return error code
@@ -348,6 +343,7 @@ btr_check_node_ptr(
/*===============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: index page */
que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((warn_unused_result));
#endif /* UNIV_DEBUG */
@@ -451,15 +447,8 @@ btr_root_block_get(
or RW_X_LATCH */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err); /*!< out: error code */
/*************************************************************//**
Reorganizes an index page.
IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index. This has to
be done either within the same mini-transaction, or by invoking
ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
IBUF_BITMAP_FREE is unaffected by reorganization.
/** Reorganize an index page.
@return error code
@retval DB_FAIL if reorganizing a ROW_FORMAT=COMPRESSED page failed */
dberr_t btr_page_reorganize_block(
@@ -530,9 +519,10 @@ btr_lift_page_up(
must not be empty: use
btr_discard_only_page_on_level if the last
record from the page should be removed */
que_thr_t* thr, /*!< in/out: query thread for SPATIAL INDEX */
mtr_t* mtr, /*!< in/out: mini-transaction */
dberr_t* err) /*!< out: error code */
__attribute__((nonnull));
__attribute__((nonnull(1,2,4,5)));
#define BTR_N_LEAF_PAGES 1
#define BTR_TOTAL_SIZE 2

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -56,11 +56,7 @@ enum {
BTR_KEEP_POS_FLAG = 8,
/** the caller is creating the index or wants to bypass the
index->info.online creation log */
BTR_CREATE_FLAG = 16,
/** the caller of btr_cur_optimistic_update() or
btr_cur_update_in_place() will take care of
updating IBUF_BITMAP_FREE */
BTR_KEEP_IBUF_BITMAP = 32
BTR_CREATE_FLAG = 16
};
/* btr_cur_latch_leaves() returns latched blocks and savepoints. */
@@ -156,8 +152,7 @@ If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
PAGE_CUR_GE, as the latter may end up on the previous page of
the record! Inserts should always be made using PAGE_CUR_LE
to search the position!
@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT,
BTR_DELETE_MARK, or BTR_DELETE;
@param latch_mode BTR_SEARCH_LEAF, ...
cursor->left_block is used to store a pointer to the left
neighbor page
@param cursor tree cursor; the cursor page is s- or x-latched, but see also
@@ -242,14 +237,8 @@ btr_cur_pessimistic_insert(
See if there is enough place in the page modification log to log
an update-in-place.
@retval false if out of space; IBUF_BITMAP_FREE will be reset
outside mtr if the page was recompressed
@retval true if enough place;
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
a secondary index leaf page. This has to be done either within the
same mini-transaction, or by invoking ibuf_reset_free_bits() before
mtr_commit(mtr). */
@retval false if out of space
@retval true if enough place */
bool
btr_cur_update_alloc_zip_func(
/*==========================*/
@@ -291,7 +280,7 @@ Updates a record when the update causes no size changes in its fields.
@return locking or undo log related error code, or
@retval DB_SUCCESS on success
@retval DB_ZIP_OVERFLOW if there is not enough space left
on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
on a ROW_FORMAT=COMPRESSED page */
dberr_t
btr_cur_update_in_place(
/*====================*/
@@ -712,34 +701,19 @@ enum btr_cur_method {
reference is stored in the field
hash_node, and might be necessary to
update */
BTR_CUR_BINARY, /*!< success using the binary search */
BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to
the insert buffer */
BTR_CUR_DEL_MARK_IBUF, /*!< performed the intended delete
mark in the insert/delete buffer */
BTR_CUR_DELETE_IBUF, /*!< performed the intended delete in
the insert/delete buffer */
BTR_CUR_DELETE_REF /*!< row_purge_poss_sec() failed */
BTR_CUR_BINARY /*!< success using the binary search */
};
/** The tree cursor: the definition appears here only for the compiler
to know struct size! */
struct btr_cur_t {
page_cur_t page_cur; /*!< page cursor */
purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
buf_block_t* left_block; /*!< this field is used to store
a pointer to the left neighbor
page, in the cases
BTR_SEARCH_PREV and
BTR_MODIFY_PREV */
/*------------------------------*/
que_thr_t* thr; /*!< this field is only used
when btr_cur_search_to_nth_level
is called for an index entry
insertion: the calling query
thread is passed here to be
used in the insert buffer */
/*------------------------------*/
/** The following fields are used in
btr_cur_search_to_nth_level to pass information: */
/* @{ */

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2018, 2022, MariaDB Corporation.
Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -76,24 +76,8 @@ enum btr_latch_mode {
/** Continue modifying the entire B-tree. */
BTR_CONT_MODIFY_TREE = 4 | BTR_MODIFY_TREE,
/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually
exclusive. */
/** The search tuple will be inserted to the secondary index
at the searched position. When the leaf page is not in the
buffer pool, try to use the change buffer. */
BTR_INSERT = 64,
/** Try to delete mark a secondary index leaf page record at
the searched position using the change buffer when the page is
not in the buffer pool. */
BTR_DELETE_MARK = 128,
/** Try to purge the record using the change buffer when the
secondary index leaf page is not in the buffer pool. */
BTR_DELETE = BTR_INSERT | BTR_DELETE_MARK,
/** The caller is already holding dict_index_t::lock S-latch. */
BTR_ALREADY_S_LATCHED = 256,
BTR_ALREADY_S_LATCHED = 16,
/** Search and S-latch a leaf page, assuming that the
dict_index_t::lock S-latch is being held. */
BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF
@@ -107,28 +91,15 @@ enum btr_latch_mode {
BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF
| BTR_ALREADY_S_LATCHED,
/** Attempt to delete-mark a secondary index record. */
BTR_DELETE_MARK_LEAF = BTR_MODIFY_LEAF | BTR_DELETE_MARK,
/** Attempt to delete-mark a secondary index record
while holding the dict_index_t::lock S-latch. */
BTR_DELETE_MARK_LEAF_ALREADY_S_LATCHED = BTR_DELETE_MARK_LEAF
| BTR_ALREADY_S_LATCHED,
/** Attempt to purge a secondary index record. */
BTR_PURGE_LEAF = BTR_MODIFY_LEAF | BTR_DELETE,
/** Attempt to purge a secondary index record
while holding the dict_index_t::lock S-latch. */
BTR_PURGE_LEAF_ALREADY_S_LATCHED = BTR_PURGE_LEAF
| BTR_ALREADY_S_LATCHED,
/** In the case of BTR_MODIFY_TREE, the caller specifies
the intention to delete record only. It is used to optimize
block->lock range.*/
BTR_LATCH_FOR_DELETE = 32,
/** In the case of BTR_MODIFY_TREE, the caller specifies
the intention to delete record only. It is used to optimize
block->lock range.*/
BTR_LATCH_FOR_DELETE = 512,
/** In the case of BTR_MODIFY_TREE, the caller specifies
the intention to delete record only. It is used to optimize
block->lock range.*/
BTR_LATCH_FOR_INSERT = 1024,
BTR_LATCH_FOR_INSERT = 64,
/** Attempt to delete a record in the tree. */
BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
@@ -136,12 +107,8 @@ enum btr_latch_mode {
/** Attempt to insert a record into the tree. */
BTR_INSERT_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_INSERT,
/** This flag ORed to BTR_INSERT says that we can ignore possible
UNIQUE definition on secondary indexes when we decide if we can use
the insert buffer to speed up inserts */
BTR_IGNORE_SEC_UNIQUE = 2048,
/** Rollback in spatial index */
BTR_RTREE_UNDO_INS = 4096,
BTR_RTREE_UNDO_INS = 128,
/** Try to delete mark a spatial index record */
BTR_RTREE_DELETE_MARK = 8192
BTR_RTREE_DELETE_MARK = 256
};

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -48,10 +48,6 @@ Created 11/5/1995 Heikki Tuuri
#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
the block young in the LRU list */
#define BUF_GET_IF_IN_POOL_OR_WATCH 15
/*!< Get the page only if it's in the
buffer pool, if not then set a watch
on the page. */
#define BUF_GET_POSSIBLY_FREED 16
/*!< Like BUF_GET, but do not mind
if the file page has been freed. */
@@ -194,11 +190,9 @@ buf_page_t *buf_page_get_zip(const page_id_t page_id, ulint zip_size);
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction
@param[out] err DB_SUCCESS or error code
@param[in] allow_ibuf_merge Allow change buffer merge while
reading the pages from file.
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_gen(
@@ -208,9 +202,8 @@ buf_page_get_gen(
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err = NULL,
bool allow_ibuf_merge = false)
MY_ATTRIBUTE((nonnull(6), warn_unused_result));
dberr_t* err = NULL)
MY_ATTRIBUTE((nonnull(6)));
/** This is the low level function used to get access to a database page.
@param[in] page_id page id
@@ -218,14 +211,10 @@ buf_page_get_gen(
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction, or NULL if a
block with page_id is to be evicted
@param[out] err DB_SUCCESS or error code
@param[in] allow_ibuf_merge Allow change buffer merge to happen
while reading the page from file
then it makes sure that it does merging of change buffer changes while
reading the page from file.
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_low(
@@ -235,8 +224,7 @@ buf_page_get_low(
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err,
bool allow_ibuf_merge);
dberr_t* err);
/** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
@@ -539,18 +527,16 @@ public:
static constexpr uint32_t REMOVE_HASH= 2;
/** smallest state() of a buffer page that is freed in the tablespace */
static constexpr uint32_t FREED= 3;
/* unused state: 1U<<29 */
/** smallest state() for a block that belongs to buf_pool.LRU */
static constexpr uint32_t UNFIXED= 1U << 29;
/** smallest state() of a block for which buffered changes may exist */
static constexpr uint32_t IBUF_EXIST= 2U << 29;
static constexpr uint32_t UNFIXED= 2U << 29;
/** smallest state() of a (re)initialized page (no doublewrite needed) */
static constexpr uint32_t REINIT= 3U << 29;
/** smallest state() for an io-fixed block */
static constexpr uint32_t READ_FIX= 4U << 29;
/* unused state: 5U<<29 */
/** smallest state() for a write-fixed block */
static constexpr uint32_t WRITE_FIX= 5U << 29;
/** smallest state() for a write-fixed block with buffered changes */
static constexpr uint32_t WRITE_FIX_IBUF= 6U << 29;
static constexpr uint32_t WRITE_FIX= 6U << 29;
/** smallest state() for a write-fixed block (no doublewrite was used) */
static constexpr uint32_t WRITE_FIX_REINIT= 7U << 29;
/** buf_pool.LRU status mask in state() */
@@ -562,8 +548,7 @@ public:
byte *frame;
/* @} */
/** ROW_FORMAT=COMPRESSED page; zip.data (but not the data it points to)
is also protected by buf_pool.mutex;
!frame && !zip.data means an active buf_pool.watch */
is also protected by buf_pool.mutex */
page_zip_des_t zip;
#ifdef UNIV_DEBUG
/** whether this->list is in buf_pool.zip_hash; protected by buf_pool.mutex */
@@ -696,13 +681,6 @@ public:
bool is_freed() const
{ const auto s= state(); ut_ad(s >= FREED); return s < UNFIXED; }
bool is_ibuf_exist() const
{
const auto s= state();
ut_ad(s >= UNFIXED);
ut_ad(s < READ_FIX);
return (s & LRU_MASK) == IBUF_EXIST;
}
bool is_reinit() const { return !(~state() & REINIT); }
void set_reinit(uint32_t prev_state)
@@ -713,29 +691,10 @@ public:
ut_ad(s < prev_state + UNFIXED);
}
void set_ibuf_exist()
{
ut_ad(lock.is_write_locked());
ut_ad(id() < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0));
const auto s= state();
ut_ad(s >= UNFIXED);
ut_ad(s < READ_FIX);
ut_ad(s < IBUF_EXIST || s >= REINIT);
zip.fix.fetch_add(IBUF_EXIST - (LRU_MASK & s));
}
void clear_ibuf_exist()
{
ut_ad(lock.is_write_locked());
ut_ad(id() < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0));
ut_d(const auto s=) zip.fix.fetch_sub(IBUF_EXIST - UNFIXED);
ut_ad(s >= IBUF_EXIST);
ut_ad(s < REINIT);
}
void read_unfix(uint32_t s)
{
ut_ad(lock.is_write_locked());
ut_ad(s == UNFIXED + 1 || s == IBUF_EXIST + 1 || s == REINIT + 1);
ut_ad(s == UNFIXED + 1 || s == REINIT + 1);
ut_d(auto old_state=) zip.fix.fetch_add(s - READ_FIX);
ut_ad(old_state >= READ_FIX);
ut_ad(old_state < WRITE_FIX);
@@ -822,7 +781,7 @@ public:
uint32_t fix(uint32_t count= 1)
{
ut_ad(count);
ut_ad(count < IBUF_EXIST);
ut_ad(count < REINIT);
uint32_t f= zip.fix.fetch_add(count);
ut_ad(f >= FREED);
ut_ad(!((f ^ (f + 1)) & LRU_MASK));
@@ -1426,82 +1385,6 @@ public:
}
public:
/** @return whether the buffer pool contains a page
@tparam allow_watch whether to allow watch_is_sentinel()
@param page_id page identifier
@param chain hash table chain for page_id.fold() */
template<bool allow_watch= false>
TRANSACTIONAL_INLINE
bool page_hash_contains(const page_id_t page_id, hash_chain &chain)
{
transactional_shared_lock_guard<page_hash_latch> g
{page_hash.lock_get(chain)};
buf_page_t *bpage= page_hash.get(page_id, chain);
if (bpage >= &watch[0] && bpage < &watch[UT_ARR_SIZE(watch)])
{
ut_ad(!bpage->in_zip_hash);
ut_ad(!bpage->zip.data);
if (!allow_watch)
bpage= nullptr;
}
return bpage;
}
/** Determine if a block is a sentinel for a buffer pool watch.
@param bpage page descriptor
@return whether bpage a sentinel for a buffer pool watch */
bool watch_is_sentinel(const buf_page_t &bpage)
{
#ifdef SAFE_MUTEX
DBUG_ASSERT(mysql_mutex_is_owner(&mutex) ||
page_hash.lock_get(page_hash.cell_get(bpage.id().fold())).
is_locked());
#endif /* SAFE_MUTEX */
ut_ad(bpage.in_file());
if (&bpage < &watch[0] || &bpage >= &watch[array_elements(watch)])
return false;
ut_ad(!bpage.in_zip_hash);
ut_ad(!bpage.zip.data);
return true;
}
/** Check if a watched page has been read.
This may only be called after !watch_set() and before invoking watch_unset().
@param id page identifier
@return whether the page was read to the buffer pool */
TRANSACTIONAL_INLINE
bool watch_occurred(const page_id_t id)
{
hash_chain &chain= page_hash.cell_get(id.fold());
transactional_shared_lock_guard<page_hash_latch> g
{page_hash.lock_get(chain)};
/* The page must exist because watch_set() increments buf_fix_count. */
return !watch_is_sentinel(*page_hash.get(id, chain));
}
/** Register a watch for a page identifier. The caller must hold an
exclusive page hash latch. The *hash_lock may be released,
relocated, and reacquired.
@param id page identifier
@param chain hash table chain with exclusively held page_hash
@return a buffer pool block corresponding to id
@retval nullptr if the block was not present, and a watch was installed */
inline buf_page_t *watch_set(const page_id_t id, hash_chain &chain);
/** Stop watching whether a page has been read in.
watch_set(id) must have returned nullptr before.
@param id page identifier
@param chain unlocked hash table chain */
void watch_unset(const page_id_t id, hash_chain &chain);
/** Remove the sentinel block for the watch before replacing it with a
real block. watch_unset() or watch_occurred() will notice
that the block has been replaced with the real block.
@param w sentinel
@param chain locked hash table chain
@return w->state() */
inline uint32_t watch_remove(buf_page_t *w, hash_chain &chain);
/** @return whether less than 1/4 of the buffer pool is available */
TPOOL_SUPPRESS_TSAN
bool running_out() const
@@ -1851,9 +1734,6 @@ public:
# error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
#endif
/** Sentinels to detect if pages are read into the buffer pool while
a delete-buffering operation is pending. Protected by mutex. */
buf_page_t watch[innodb_purge_threads_MAX + 1];
/** Reserve a buffer. */
buf_tmp_buffer_t *io_buf_reserve() { return io_buf.reserve(); }

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2021, MariaDB Corporation.
Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,8 +24,7 @@ The database buffer read
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0rea_h
#define buf0rea_h
#pragma once
#include "buf0buf.h"
@@ -56,21 +55,14 @@ void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
/** Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
mechanism is not activated. NOTE 1: the calling thread may own latches on
mechanism is not activated. NOTE: the calling thread may own latches on
pages: to avoid deadlocks this function must be written such that it cannot
end up waiting for these latches! NOTE 2: the calling thread must want
access to the page given: this rule is set to prevent unintended read-aheads
performed by ibuf routines, a situation which could result in a deadlock if
the OS does not support asynchronous i/o.
end up waiting for these latches!
@param[in] page_id page id of a page which the current thread
wants to access
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] ibuf whether we are inside ibuf routine
@return number of page read requests issued; NOTE that if we read ibuf
pages, it may happen that the page at the given page number does not
get read even if we return a positive value! */
ulint
buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf);
@return number of page read requests issued */
ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size);
/** Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
@@ -91,26 +83,12 @@ only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io.
@param[in] page_id page id; see NOTE 3 above
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] ibuf whether if we are inside ibuf routine
@return number of page read requests issued */
ulint
buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf);
ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size);
/** Issue read requests for pages that need to be recovered.
@param space_id tablespace identifier
@param page_nos page numbers to read, in ascending order */
void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos);
/** @name Modes used in read-ahead @{ */
/** read only pages belonging to the insert buffer tree */
#define BUF_READ_IBUF_PAGES_ONLY 131
/** read any page */
#define BUF_READ_ANY_PAGE 132
/* @} */
#endif

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,6 @@ Created 1/16/1996 Heikki Tuuri
/** @return whether a length is actually stored in a field */
#define len_is_stored(len) (len != UNIV_SQL_NULL && len != UNIV_SQL_DEFAULT)
extern ulint data_mysql_default_charset_coll;
#define DATA_MYSQL_BINARY_CHARSET_COLL 63
/* SQL data type struct */
@@ -196,14 +195,6 @@ constexpr uint8_t DATA_MBR_LEN= uint8_t(SPDIMS * 2 * sizeof(double));
/*-------------------------------------------*/
/* This many bytes we need to store the type information affecting the
alphabetical order for a single field and decide the storage size of an
SQL null*/
#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4
/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
store the charset-collation number; one byte is left unused, though */
#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
/* Maximum multi-byte character length in bytes, plus 1 */
#define DATA_MBMAX 8
@@ -344,13 +335,11 @@ charset-collation code.
DATA_BINARY_TYPE etc.
@param[in] charset_coll character-set collation code
@return precise type, including the charset-collation code */
UNIV_INLINE
uint32_t
dtype_form_prtype(ulint old_prtype, ulint charset_coll)
inline uint32_t dtype_form_prtype(ulint old_prtype, ulint charset_coll)
{
ut_ad(old_prtype < 256 * 256);
ut_ad(charset_coll <= MAX_CHAR_COLL_NUM);
return(uint32_t(old_prtype + (charset_coll << 16)));
ut_ad(old_prtype <= 0xffff);
ut_ad(charset_coll <= MAX_CHAR_COLL_NUM);
return uint32_t(old_prtype | (charset_coll << 16));
}
/*********************************************************************//**
@@ -439,40 +428,6 @@ dtype_get_sql_null_size(
const dtype_t* type, /*!< in: type */
ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /*!< in: type struct */
const byte* buf); /*!< in: buffer for the stored order info */
/**********************************************************************//**
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_store_for_order_and_null_size(
/*====================================*/
byte* buf, /*!< in: buffer for
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
bytes where we store the info */
const dtype_t* type, /*!< in: type struct */
ulint prefix_len);/*!< in: prefix length to
replace type->len, or 0 */
/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_read_for_order_and_null_size(
/*===================================*/
dtype_t* type, /*!< in: type struct */
const byte* buf); /*!< in: buffer for stored type order info */
/*********************************************************************//**
Validates a data type structure.
@return TRUE if ok */
@@ -494,8 +449,6 @@ struct dict_col_t;
If you add fields to this structure, be sure to initialize them everywhere.
This structure is initialized in the following functions:
dtype_set()
dtype_read_for_order_and_null_size()
dtype_new_read_for_order_and_null_size()
sym_tab_add_null_lit() */
struct dtype_t{

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -183,126 +183,6 @@ dtype_get_mbmaxlen(
return type->mbmaxlen;
}
/**********************************************************************//**
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_store_for_order_and_null_size(
/*====================================*/
byte* buf, /*!< in: buffer for
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
bytes where we store the info */
const dtype_t* type, /*!< in: type struct */
ulint prefix_len)/*!< in: prefix length to
replace type->len, or 0 */
{
compile_time_assert(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
ulint len;
ut_ad(type);
ut_ad(type->mtype >= DATA_VARCHAR);
ut_ad(type->mtype <= DATA_MTYPE_MAX);
buf[0] = (byte)(type->mtype & 0xFFUL);
if (type->prtype & DATA_BINARY_TYPE) {
buf[0] |= 128;
}
/* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) {
buf[0] |= 64;
}
*/
buf[1] = (byte)(type->prtype & 0xFFUL);
len = prefix_len ? prefix_len : type->len;
mach_write_to_2(buf + 2, len & 0xFFFFUL);
ut_ad(dtype_get_charset_coll(type->prtype) <= MAX_CHAR_COLL_NUM);
mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
if (type->prtype & DATA_NOT_NULL) {
buf[4] |= 128;
}
}
/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the < 4.1.x
storage format. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /*!< in: type struct */
const byte* buf) /*!< in: buffer for stored type order info */
{
compile_time_assert(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
type->prtype |= DATA_BINARY_TYPE;
}
type->len = mach_read_from_2(buf + 2);
type->prtype = dtype_form_prtype(type->prtype,
data_mysql_default_charset_coll);
dtype_set_mblen(type);
}
/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
storage format. */
UNIV_INLINE
void
dtype_new_read_for_order_and_null_size(
/*===================================*/
dtype_t* type, /*!< in: type struct */
const byte* buf) /*!< in: buffer for stored type order info */
{
compile_time_assert(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
type->prtype |= DATA_BINARY_TYPE;
}
if (buf[4] & 128) {
type->prtype |= DATA_NOT_NULL;
}
type->len = mach_read_from_2(buf + 2);
ulint charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
if (dtype_is_string_type(type->mtype)) {
ut_a(charset_coll <= MAX_CHAR_COLL_NUM);
if (charset_coll == 0) {
/* This insert buffer record was inserted with MySQL
version < 4.1.2, and the charset-collation code was not
explicitly stored to dtype->prtype at that time. It
must be the default charset-collation of this MySQL
installation. */
charset_coll = data_mysql_default_charset_coll;
}
type->prtype = dtype_form_prtype(type->prtype, charset_coll);
}
dtype_set_mblen(type);
}
/***********************************************************************//**
Returns the size of a fixed size data type, 0 if not a fixed size type.
@return fixed size, or 0 */

View File

@@ -2,7 +2,7 @@
Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -634,8 +634,6 @@ dict_table_get_next_index(
#define dict_index_is_auto_gen_clust(index) (index)->is_gen_clust()
#define dict_index_is_unique(index) (index)->is_unique()
#define dict_index_is_spatial(index) (index)->is_spatial()
#define dict_index_is_ibuf(index) (index)->is_ibuf()
#define dict_index_is_sec_or_ibuf(index) !(index)->is_primary()
#define dict_index_has_virtual(index) (index)->has_virtual()
/** Get all the FTS indexes on a table.

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,9 +39,7 @@ Created 4/24/1996 Heikki Tuuri
/** A stack of table names related through foreign key constraints */
typedef std::deque<const char*, ut_allocator<const char*> > dict_names_t;
/** Check each tablespace found in the data dictionary.
Then look at each table defined in SYS_TABLES that has a space_id > 0
to find all the file-per-table tablespaces.
/** Open each tablespace found in the data dictionary.
In a crash recovery we already have some tablespace objects created from
processing the REDO log. We will compare the
@@ -50,7 +48,7 @@ tablespace file. In addition, more validation will be done if recovery
was needed and force_recovery is not set.
We also scan the biggest space id, and store it to fil_system. */
void dict_check_tablespaces_and_store_max_id();
void dict_load_tablespaces();
/** Make sure the data_file_name is saved in dict_table_t if needed.
@param[in,out] table Table object */

View File

@@ -64,7 +64,6 @@ combination of types */
auto-generated clustered indexes,
also DICT_UNIQUE will be set */
#define DICT_UNIQUE 2 /*!< unique index */
#define DICT_IBUF 8 /*!< insert buffer tree */
#define DICT_CORRUPT 16 /*!< bit to store the corrupted flag
in SYS_INDEXES.TYPE */
#define DICT_FTS 32 /* FTS index; can't be combined with the
@@ -995,7 +994,7 @@ struct dict_index_t {
# define DICT_INDEX_MERGE_THRESHOLD_DEFAULT 50
unsigned type:DICT_IT_BITS;
/*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
DICT_IBUF, DICT_CORRUPT) */
DICT_CORRUPT) */
#define MAX_KEY_LENGTH_BITS 12
unsigned trx_id_offset:MAX_KEY_LENGTH_BITS;
/*!< position of the trx id column
@@ -1184,12 +1183,8 @@ public:
/** @return whether instant ALTER TABLE is in effect */
inline bool is_instant() const;
/** @return whether the index is the primary key index
(not the clustered index of the change buffer) */
bool is_primary() const
{
return DICT_CLUSTERED == (type & (DICT_CLUSTERED | DICT_IBUF));
}
/** @return whether the index is the primary key index */
bool is_primary() const { return is_clust(); }
/** @return whether this is a generated clustered index */
bool is_gen_clust() const { return type == DICT_CLUSTERED; }
@@ -1203,16 +1198,13 @@ public:
/** @return whether this is a spatial index */
bool is_spatial() const { return UNIV_UNLIKELY(type & DICT_SPATIAL); }
/** @return whether this is the change buffer */
bool is_ibuf() const { return UNIV_UNLIKELY(type & DICT_IBUF); }
/** @return whether this index requires locking */
bool has_locking() const { return !is_ibuf(); }
static constexpr bool has_locking() { return true; }
/** @return whether this is a normal B-tree index
(not the change buffer, not SPATIAL or FULLTEXT) */
bool is_btree() const {
return UNIV_LIKELY(!(type & (DICT_IBUF | DICT_SPATIAL
return UNIV_LIKELY(!(type & (DICT_SPATIAL
| DICT_FTS | DICT_CORRUPT)));
}

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -48,10 +48,6 @@ struct dict_add_v_col_t;
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
/* The ibuf table and indexes's ID are assigned as the number
DICT_IBUF_ID_MIN plus the space id */
#define DICT_IBUF_ID_MIN 0xFFFFFFFF00000000ULL
typedef ib_id_t table_id_t;
typedef ib_id_t index_id_t;
@@ -136,13 +132,6 @@ struct table_name_t
inline bool is_temporary() const;
};
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/** Dump the change buffer at startup */
extern my_bool ibuf_dump;
/** Flag to control insert buffer debugging. */
extern uint ibuf_debug;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/** Shift for spatial status */
#define SPATIAL_STATUS_SHIFT 12

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1247,11 +1247,11 @@ constexpr uint16_t FIL_PAGE_RTREE= 17854;
constexpr uint16_t FIL_PAGE_UNDO_LOG= 2;
/** Index node (of file-in-file metadata) */
constexpr uint16_t FIL_PAGE_INODE= 3;
/** Insert buffer free list */
/** Former change buffer free list */
constexpr uint16_t FIL_PAGE_IBUF_FREE_LIST= 4;
/** Freshly allocated page */
constexpr uint16_t FIL_PAGE_TYPE_ALLOCATED= 0;
/** Change buffer bitmap (pages n*innodb_page_size+1) */
/** Former change buffer bitmap pages (pages n*innodb_page_size+1) */
constexpr uint16_t FIL_PAGE_IBUF_BITMAP= 5;
/** System page */
constexpr uint16_t FIL_PAGE_TYPE_SYS= 6;

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2022, MariaDB Corporation.
Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -157,28 +157,20 @@ this many file pages */
/* This has been replaced with either srv_page_size or page_zip->size. */
/** @name The space low address page map
The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
The 2 pages at FSP_XDES_OFFSET are repeated
every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
/* @{ */
/*--------------------------------------*/
#define FSP_XDES_OFFSET 0U /* !< extent descriptor */
#define FSP_IBUF_BITMAP_OFFSET 1U /* !< insert buffer bitmap */
/* The ibuf bitmap pages are the ones whose
page number is the number above plus a
multiple of XDES_DESCRIBED_PER_PAGE */
#define FSP_FIRST_INODE_PAGE_NO 2U /*!< in every tablespace */
/* The following pages exist
in the system tablespace (space 0). */
#define FSP_IBUF_HEADER_PAGE_NO 3U /*!< insert buffer
#define FSP_IBUF_HEADER_PAGE_NO 3U /*!< former change buffer
header page, in
tablespace 0 */
#define FSP_IBUF_TREE_ROOT_PAGE_NO 4U /*!< insert buffer
#define FSP_IBUF_TREE_ROOT_PAGE_NO 4U /*!< former change buffer
B-tree root page in
tablespace 0 */
/* The ibuf tree root page number in
tablespace 0; its fseg inode is on the page
number FSP_FIRST_INODE_PAGE_NO */
#define FSP_TRX_SYS_PAGE_NO 5U /*!< transaction
system header, in
tablespace 0 */

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -94,7 +94,29 @@ rtr_page_split_and_insert(
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err); /*!< out: error code */
dberr_t* err, /*!< out: error code */
que_thr_t* thr); /*!< in: query thread */
/*************************************************************//**
Makes tree one level higher by splitting the root, and inserts the tuple.
NOTE that the operation of this function must always succeed,
we cannot reverse it: therefore enough free disk space must be
guaranteed to be available before this function is called.
@return inserted record */
rec_t*
rtr_root_raise_and_insert(
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
rec_offs** offsets,/*!< out: offsets on inserted record */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err, /*!< out: error code */
que_thr_t* thr); /*!< in: query thread */
/**************************************************************//**
Sets the child node mbr in a node pointer. */
@@ -205,8 +227,8 @@ rtr_create_rtr_info(
bool init_matches, /*!< in: Whether to initiate the
"matches" structure for collecting
matched leaf records */
btr_cur_t* cursor, /*!< in: tree search cursor */
dict_index_t* index); /*!< in: index struct */
que_thr_t* thr, /*!< in/out: query thread */
btr_cur_t* cursor); /*!< in: tree search cursor */
/********************************************************************//**
Update a btr_cur_t with rtr_info */
@@ -261,8 +283,10 @@ rtr_get_mbr_from_tuple(
about parent nodes in search
@param[in,out] cursor cursor on node pointer record,
its page x-latched
@param[in,out] thr query thread
@return whether the cursor was successfully positioned */
bool rtr_page_get_father(mtr_t *mtr, btr_cur_t *sea_cur, btr_cur_t *cursor)
bool rtr_page_get_father(mtr_t *mtr, btr_cur_t *sea_cur, btr_cur_t *cursor,
que_thr_t *thr)
MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
/************************************************************//**
@@ -274,11 +298,12 @@ rtr_page_get_father_block(
/*======================*/
rec_offs* offsets,/*!< in: work area for the return value */
mem_heap_t* heap, /*!< in: memory heap to use */
mtr_t* mtr, /*!< in: mtr */
btr_cur_t* sea_cur,/*!< in: search cursor, contains information
about parent nodes in search */
btr_cur_t* cursor);/*!< out: cursor on node pointer record,
btr_cur_t* cursor, /*!< out: cursor on node pointer record,
its page x-latched */
que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr); /*!< in/out: mtr */
/**************************************************************//**
Store the parent path cursor
@return number of cursor stored */
@@ -297,10 +322,10 @@ Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
bool
rtr_pcur_open(
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
que_thr_t* thr, /*!< in/out; query thread */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((warn_unused_result));

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2021, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -240,6 +240,9 @@ rtr_info_reinit_in_cursor(
bool need_prdt) /*!< in: Whether predicate lock is
needed */
{
que_thr_t* thr = cursor->rtr_info->thr;
ut_ad(thr);
rtr_clean_rtr_info(cursor->rtr_info, false);
rtr_init_rtr_info(cursor->rtr_info, need_prdt, cursor, index, true);
cursor->rtr_info->thr = thr;
}

View File

@@ -1,7 +1,6 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2022, MariaDB Corporation.
Copyright (c) 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -17,420 +16,40 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
/**************************************************//**
@file include/ibuf0ibuf.h
Insert buffer
#include "db0err.h"
Created 7/19/1997 Heikki Tuuri
*******************************************************/
/* The purpose of the change buffer was to reduce random disk access.
When we wished to
(1) insert a record into a non-unique secondary index,
(2) delete-mark a secondary index record,
(3) delete a secondary index record as part of purge (but not ROLLBACK),
and the B-tree leaf page where the record belongs to is not in the buffer
pool, we inserted a record into the change buffer B-tree, indexed by
the page identifier. When the page was eventually read into the buffer
pool, we looked up the change buffer B-tree for any modifications to the
page, applied these upon the completion of the read operation. This
was called the insert buffer merge.
#ifndef ibuf0ibuf_h
#define ibuf0ibuf_h
There was a hash index of the change buffer B-tree, implemented as the
"change buffer bitmap". Bits in these bitmap pages indicated how full
the page roughly was, and whether any records for the page identifier
exist in the change buffer. The "free" bits had to be updated as part of
operations that modified secondary index leaf pages.
#include "mtr0mtr.h"
#include "dict0mem.h"
#include "fsp0fsp.h"
Because the change buffer has been removed, we will no longer update
any change buffer bitmap pages. Instead, on database startup, we will
check if an upgrade needs to be performed, and apply any buffered
changes if that is the case. Finally, the change buffer will be
transformed to a format that will not be recognized by earlier
versions of MariaDB Server, to prevent downgrades from causing
corruption (due to the removed updates of the bitmap pages) when the
change buffer might be enabled. */
/** Default value for maximum on-disk size of change buffer in terms
of percentage of the buffer pool. */
#define CHANGE_BUFFER_DEFAULT_SIZE (25)
/** Check if ibuf_upgrade() is needed as part of server startup.
@return error code
@retval DB_SUCCESS if no upgrade is needed
@retval DB_FAIL if the change buffer is not empty (need ibuf_upgrade()) */
dberr_t ibuf_upgrade_needed();
/* Possible operations buffered in the insert/whatever buffer. See
ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */
typedef enum {
IBUF_OP_INSERT = 0,
IBUF_OP_DELETE_MARK = 1,
IBUF_OP_DELETE = 2,
/* Number of different operation types. */
IBUF_OP_COUNT = 3
} ibuf_op_t;
/** Combinations of operations that can be buffered.
@see innodb_change_buffering_names */
enum ibuf_use_t {
IBUF_USE_NONE = 0,
IBUF_USE_INSERT, /* insert */
IBUF_USE_DELETE_MARK, /* delete */
IBUF_USE_INSERT_DELETE_MARK, /* insert+delete */
IBUF_USE_DELETE, /* delete+purge */
IBUF_USE_ALL /* insert+delete+purge */
};
/** Operations that can currently be buffered. */
extern ulong innodb_change_buffering;
/** Insert buffer struct */
struct ibuf_t{
Atomic_relaxed<ulint> size; /*!< current size of the ibuf index
tree, in pages */
Atomic_relaxed<ulint> max_size; /*!< recommended maximum size of the
ibuf index tree, in pages */
ulint seg_size; /*!< allocated pages of the file
segment containing ibuf header and
tree */
bool empty; /*!< Protected by the page
latch of the root page of the
insert buffer tree
(FSP_IBUF_TREE_ROOT_PAGE_NO). true
if and only if the insert
buffer tree is empty. */
ulint free_list_len; /*!< length of the free list */
ulint height; /*!< tree height */
dict_index_t* index; /*!< insert buffer index */
/** number of pages merged */
Atomic_counter<ulint> n_merges;
Atomic_counter<ulint> n_merged_ops[IBUF_OP_COUNT];
/*!< number of operations of each type
merged to index pages */
Atomic_counter<ulint> n_discarded_ops[IBUF_OP_COUNT];
/*!< number of operations of each type
discarded without merging due to the
tablespace being deleted or the
index being dropped */
};
/** The insert buffer control structure */
extern ibuf_t ibuf;
/* The purpose of the insert buffer is to reduce random disk access.
When we wish to insert a record into a non-unique secondary index and
the B-tree leaf page where the record belongs to is not in the buffer
pool, we insert the record into the insert buffer B-tree, indexed by
(space_id, page_no). When the page is eventually read into the buffer
pool, we look up the insert buffer B-tree for any modifications to the
page, and apply these upon the completion of the read operation. This
is called the insert buffer merge. */
/* The insert buffer merge must always succeed. To guarantee this,
the insert buffer subsystem keeps track of the free space in pages for
which it can buffer operations. Two bits per page in the insert
buffer bitmap indicate the available space in coarse increments. The
free bits in the insert buffer bitmap must never exceed the free space
on a page. It is safe to decrement or reset the bits in the bitmap in
a mini-transaction that is committed before the mini-transaction that
affects the free space. It is unsafe to increment the bits in a
separately committed mini-transaction, because in crash recovery, the
free bits could momentarily be set too high. */
/******************************************************************//**
Creates the insert buffer data structure at a database startup.
@return DB_SUCCESS or failure */
dberr_t
ibuf_init_at_db_start(void);
/*=======================*/
/*********************************************************************//**
Updates the max_size value for ibuf. */
void
ibuf_max_size_update(
/*=================*/
ulint new_val); /*!< in: new value in terms of
percentage of the buffer pool size */
/*********************************************************************//**
Reads the biggest tablespace id from the high end of the insert buffer
tree and updates the counter in fil_system. */
void
ibuf_update_max_tablespace_id(void);
/*===============================*/
/***************************************************************//**
Starts an insert buffer mini-transaction. */
UNIV_INLINE
void
ibuf_mtr_start(
/*===========*/
mtr_t* mtr) /*!< out: mini-transaction */
MY_ATTRIBUTE((nonnull));
/***************************************************************//**
Commits an insert buffer mini-transaction. */
UNIV_INLINE
void
ibuf_mtr_commit(
/*============*/
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));
/************************************************************************//**
Resets the free bits of the page in the ibuf bitmap. This is done in a
separate mini-transaction, hence this operation does not restrict
further work to only ibuf bitmap operations, which would result if the
latch to the bitmap page were kept. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is safe
to decrement or reset the bits in the bitmap in a mini-transaction
that is committed before the mini-transaction that affects the free
space. */
void
ibuf_reset_free_bits(
/*=================*/
buf_block_t* block); /*!< in: index page; free bits are set to 0
if the index is a non-clustered
non-unique, and page level is 0 */
/************************************************************************//**
Updates the free bits of an uncompressed page in the ibuf bitmap if
there is not enough free on the page any more. This is done in a
separate mini-transaction, hence this operation does not restrict
further work to only ibuf bitmap operations, which would result if the
latch to the bitmap page were kept. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is
unsafe to increment the bits in a separately committed
mini-transaction, because in crash recovery, the free bits could
momentarily be set too high. It is only safe to use this function for
decrementing the free bits. Should more free space become available,
we must not update the free bits here, because that would break crash
recovery. */
UNIV_INLINE
void
ibuf_update_free_bits_if_full(
/*==========================*/
buf_block_t* block, /*!< in: index page to which we have added new
records; the free bits are updated if the
index is non-clustered and non-unique and
the page level is 0, and the page becomes
fuller */
ulint max_ins_size,/*!< in: value of maximum insert size with
reorganize before the latest operation
performed to the page */
ulint increase);/*!< in: upper limit for the additional space
used in the latest operation, if known, or
ULINT_UNDEFINED */
/**********************************************************************//**
Updates the free bits for an uncompressed page to reflect the present
state. Does this in the mtr given, which means that the latching
order rules virtually prevent any further operations for this OS
thread until mtr is committed. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is safe
to set the free bits in the same mini-transaction that updated the
page. */
void
ibuf_update_free_bits_low(
/*======================*/
const buf_block_t* block, /*!< in: index page */
ulint max_ins_size, /*!< in: value of
maximum insert size
with reorganize before
the latest operation
performed to the page */
mtr_t* mtr); /*!< in/out: mtr */
/**********************************************************************//**
Updates the free bits for a compressed page to reflect the present
state. Does this in the mtr given, which means that the latching
order rules virtually prevent any further operations for this OS
thread until mtr is committed. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is safe
to set the free bits in the same mini-transaction that updated the
page. */
void
ibuf_update_free_bits_zip(
/*======================*/
buf_block_t* block, /*!< in/out: index page */
mtr_t* mtr); /*!< in/out: mtr */
/**********************************************************************//**
Updates the free bits for the two pages to reflect the present state.
Does this in the mtr given, which means that the latching order rules
virtually prevent any further operations until mtr is committed.
NOTE: The free bits in the insert buffer bitmap must never exceed the
free space on a page. It is safe to set the free bits in the same
mini-transaction that updated the pages. */
void
ibuf_update_free_bits_for_two_pages_low(
/*====================================*/
buf_block_t* block1, /*!< in: index page */
buf_block_t* block2, /*!< in: index page */
mtr_t* mtr); /*!< in: mtr */
/**********************************************************************//**
A basic partial test if an insert to the insert buffer could be possible and
recommended. */
UNIV_INLINE
ibool
ibuf_should_try(
/*============*/
dict_index_t* index, /*!< in: index where to insert */
ulint ignore_sec_unique); /*!< in: if != 0, we should
ignore UNIQUE constraint on
a secondary index when we
decide */
/******************************************************************//**
Returns TRUE if the current OS thread is performing an insert buffer
routine.
For instance, a read-ahead of non-ibuf pages is forbidden by threads
that are executing an insert buffer routine.
@return TRUE if inside an insert buffer routine */
UNIV_INLINE
ibool
ibuf_inside(
/*========*/
const mtr_t* mtr) /*!< in: mini-transaction */
MY_ATTRIBUTE((warn_unused_result));
/** Checks if a page address is an ibuf bitmap page (level 3 page) address.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@return TRUE if a bitmap page */
inline bool ibuf_bitmap_page(const page_id_t page_id, ulint zip_size)
{
ut_ad(ut_is_2pow(zip_size));
ulint size = zip_size ? zip_size : srv_page_size;
return (page_id.page_no() & (size - 1)) == FSP_IBUF_BITMAP_OFFSET;
}
/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
Must not be called when recv_no_ibuf_operations==true.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] x_latch FALSE if relaxed check (avoid latching the
bitmap page)
@param[in,out] mtr mtr which will contain an x-latch to the
bitmap page if the page is not one of the fixed address ibuf pages, or NULL,
in which case a new transaction is created.
@return true if level 2 or level 3 page */
bool
ibuf_page_low(
const page_id_t page_id,
ulint zip_size,
#ifdef UNIV_DEBUG
bool x_latch,
#endif /* UNIV_DEBUG */
mtr_t* mtr)
MY_ATTRIBUTE((warn_unused_result));
#ifdef UNIV_DEBUG
/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
Must not be called when recv_no_ibuf_operations==true.
@param[in] page_id tablespace/page identifier
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] mtr mini-transaction or NULL
@return TRUE if level 2 or level 3 page */
# define ibuf_page(page_id, zip_size, mtr) \
ibuf_page_low(page_id, zip_size, true, mtr)
#else /* UNIV_DEBUG */
/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
Must not be called when recv_no_ibuf_operations==true.
@param[in] page_id tablespace/page identifier
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] mtr mini-transaction or NULL
@return TRUE if level 2 or level 3 page */
# define ibuf_page(page_id, zip_size, mtr) \
ibuf_page_low(page_id, zip_size, mtr)
#endif /* UNIV_DEBUG */
/***********************************************************************//**
Frees excess pages from the ibuf free list. This function is called when an OS
thread calls fsp services to allocate a new file segment, or a new page to a
file segment, and the thread did not own the fsp latch before this call. */
void
ibuf_free_excess_pages(void);
/*========================*/
/** Buffer an operation in the change buffer, instead of applying it
directly to the file page, if this is possible. Does not do it if the index
is clustered or unique.
@param[in] op operation type
@param[in] entry index entry to insert
@param[in,out] index index where to insert
@param[in] page_id page id where to insert
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] thr query thread
@return true if success */
bool
ibuf_insert(
ibuf_op_t op,
const dtuple_t* entry,
dict_index_t* index,
const page_id_t page_id,
ulint zip_size,
que_thr_t* thr);
/** Check whether buffered changes exist for a page.
@param[in] id page identifier
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@return whether buffered changes exist */
bool ibuf_page_exists(const page_id_t id, ulint zip_size);
/** When an index page is read from a disk to the buffer pool, this function
applies any buffered operations to the page and deletes the entries from the
insert buffer. If the page is not read, but created in the buffer pool, this
function deletes its buffered entries from the insert buffer; there can
exist entries for such a page if the page belonged to an index which
subsequently was dropped.
@param block X-latched page to try to apply changes to, or NULL to discard
@param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
@return error code */
dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block,
const page_id_t page_id,
ulint zip_size);
/** Delete all change buffer entries for a tablespace,
in DISCARD TABLESPACE, IMPORT TABLESPACE, or read-ahead.
@param[in] space missing or to-be-discarded tablespace */
void ibuf_delete_for_discarded_space(uint32_t space);
/** Contract the change buffer by reading pages to the buffer pool.
@return a lower limit for the combined size in bytes of entries which
will be merged from ibuf trees to the pages read
@retval 0 if ibuf.empty */
ulint ibuf_contract();
/** Contracts insert buffer trees by reading pages referring to space_id
to the buffer pool.
@returns number of pages merged.*/
ulint
ibuf_merge_space(
/*=============*/
ulint space); /*!< in: space id */
/******************************************************************//**
Looks if the insert buffer is empty.
@return true if empty */
bool
ibuf_is_empty(void);
/*===============*/
/******************************************************************//**
Prints info of ibuf. */
void
ibuf_print(
/*=======*/
FILE* file); /*!< in: file where to print */
/********************************************************************
Read the first two bytes from a record's fourth field (counter field in new
records; something else in older records).
@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
ulint
ibuf_rec_get_counter(
/*=================*/
const rec_t* rec); /*!< in: ibuf record */
/******************************************************************//**
Closes insert buffer and frees the data structures. */
void
ibuf_close(void);
/*============*/
/** Check the insert buffer bitmaps on IMPORT TABLESPACE.
@param[in] trx transaction
@param[in,out] space tablespace being imported
@return DB_SUCCESS or error code */
dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Update free bits and buffered bits for bulk loaded page.
@param block secondary index leaf page
@param mtr mini-transaction
@param reset whether the page is full */
void ibuf_set_bitmap_for_bulk_load(buf_block_t *block, mtr_t *mtr, bool reset);
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
/* The ibuf header page currently contains only the file segment header
for the file segment from which the pages for the ibuf tree are allocated */
#define IBUF_HEADER PAGE_DATA
#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
/* The insert buffer tree itself is always located in space 0. */
#define IBUF_SPACE_ID static_cast<ulint>(0)
#include "ibuf0ibuf.inl"
#endif
/** Upgrade the change buffer after all redo log has been applied. */
dberr_t ibuf_upgrade();

View File

@@ -1,282 +0,0 @@
/*****************************************************************************
Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file include/ibuf0ibuf.ic
Insert buffer
Created 7/19/1997 Heikki Tuuri
*******************************************************/
#include "page0page.h"
#include "page0zip.h"
#include "fsp0types.h"
#include "buf0lru.h"
/** An index page must contain at least srv_page_size /
IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
buffer inserts to this page. If there is this much of free space, the
corresponding bits are set in the ibuf bitmap. */
#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
/***************************************************************//**
Starts an insert buffer mini-transaction. */
UNIV_INLINE
void
ibuf_mtr_start(
/*===========*/
mtr_t* mtr) /*!< out: mini-transaction */
{
mtr_start(mtr);
mtr->enter_ibuf();
if (high_level_read_only || srv_read_only_mode) {
mtr_set_log_mode(mtr, MTR_LOG_NO_REDO);
}
}
/***************************************************************//**
Commits an insert buffer mini-transaction. */
UNIV_INLINE
void
ibuf_mtr_commit(
/*============*/
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ut_ad(mtr->is_inside_ibuf());
ut_d(mtr->exit_ibuf());
mtr_commit(mtr);
}
/************************************************************************//**
Sets the free bit of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
void
ibuf_set_free_bits_func(
/*====================*/
buf_block_t* block, /*!< in: index page of a non-clustered index;
free bit is reset if page level is 0 */
#ifdef UNIV_IBUF_DEBUG
ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
value which the bits must have before
setting; this is for debugging */
#endif /* UNIV_IBUF_DEBUG */
ulint val); /*!< in: value to set: < 4 */
#ifdef UNIV_IBUF_DEBUG
# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v)
#else /* UNIV_IBUF_DEBUG */
# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v)
#endif /* UNIV_IBUF_DEBUG */
/**********************************************************************//**
A basic partial test if an insert to the insert buffer could be possible and
recommended. */
UNIV_INLINE
ibool
ibuf_should_try(
/*============*/
dict_index_t* index, /*!< in: index where to insert */
ulint ignore_sec_unique) /*!< in: if != 0, we should
ignore UNIQUE constraint on
a secondary index when we
decide */
{
if (!innodb_change_buffering || !ibuf.max_size || index->is_clust() ||
index->is_spatial())
return false;
if (!ignore_sec_unique && index->is_unique())
return false;
if (index->table->quiesce != QUIESCE_NONE)
return false;
for (unsigned i= 0; i < index->n_fields; i++)
if (index->fields[i].descending)
return false;
return true;
}
/******************************************************************//**
Returns TRUE if the current OS thread is performing an insert buffer
routine.
For instance, a read-ahead of non-ibuf pages is forbidden by threads
that are executing an insert buffer routine.
@return TRUE if inside an insert buffer routine */
UNIV_INLINE
ibool
ibuf_inside(
/*========*/
const mtr_t* mtr) /*!< in: mini-transaction */
{
return(mtr->is_inside_ibuf());
}
/** Translates the free space on a page to a value in the ibuf bitmap.
@param[in] page_size page size in bytes
@param[in] max_ins_size maximum insert size after reorganize for
the page
@return value for ibuf bitmap bits */
UNIV_INLINE
ulint
ibuf_index_page_calc_free_bits(
ulint page_size,
ulint max_ins_size)
{
ulint n;
ut_ad(ut_is_2pow(page_size));
ut_ad(page_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
n = max_ins_size / (page_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
if (n == 3) {
n = 2;
}
if (n > 3) {
n = 3;
}
return(n);
}
/*********************************************************************//**
Translates the free space on a compressed page to a value in the ibuf bitmap.
@return value for ibuf bitmap bits */
UNIV_INLINE
ulint
ibuf_index_page_calc_free_zip(
/*==========================*/
const buf_block_t* block) /*!< in: buffer block */
{
ulint max_ins_size;
const page_zip_des_t* page_zip;
lint zip_max_ins;
ut_ad(block->page.zip.data);
/* Consider the maximum insert size on the uncompressed page
without reorganizing the page. We must not assume anything
about the compression ratio. If zip_max_ins > max_ins_size and
there is 1/4 garbage on the page, recompression after the
reorganize could fail, in theory. So, let us guarantee that
merging a buffered insert to a compressed page will always
succeed without reorganizing or recompressing the page, just
by using the page modification log. */
max_ins_size = page_get_max_insert_size(
buf_block_get_frame(block), 1);
page_zip = buf_block_get_page_zip(block);
zip_max_ins = page_zip_max_ins_size(page_zip,
FALSE/* not clustered */);
if (zip_max_ins < 0) {
return(0);
} else if (max_ins_size > (ulint) zip_max_ins) {
max_ins_size = (ulint) zip_max_ins;
}
return(ibuf_index_page_calc_free_bits(block->physical_size(),
max_ins_size));
}
/*********************************************************************//**
Translates the free space on a page to a value in the ibuf bitmap.
@return value for ibuf bitmap bits */
UNIV_INLINE
ulint
ibuf_index_page_calc_free(
/*======================*/
const buf_block_t* block) /*!< in: buffer block */
{
if (!block->page.zip.data) {
ulint max_ins_size;
max_ins_size = page_get_max_insert_size_after_reorganize(
buf_block_get_frame(block), 1);
return(ibuf_index_page_calc_free_bits(
block->physical_size(), max_ins_size));
} else {
return(ibuf_index_page_calc_free_zip(block));
}
}
/************************************************************************//**
Updates the free bits of an uncompressed page in the ibuf bitmap if
there is not enough free on the page any more. This is done in a
separate mini-transaction, hence this operation does not restrict
further work to only ibuf bitmap operations, which would result if the
latch to the bitmap page were kept. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is
unsafe to increment the bits in a separately committed
mini-transaction, because in crash recovery, the free bits could
momentarily be set too high. It is only safe to use this function for
decrementing the free bits. Should more free space become available,
we must not update the free bits here, because that would break crash
recovery. */
UNIV_INLINE
void
ibuf_update_free_bits_if_full(
/*==========================*/
buf_block_t* block, /*!< in: index page to which we have added new
records; the free bits are updated if the
index is non-clustered and non-unique and
the page level is 0, and the page becomes
fuller */
ulint max_ins_size,/*!< in: value of maximum insert size with
reorganize before the latest operation
performed to the page */
ulint increase)/*!< in: upper limit for the additional space
used in the latest operation, if known, or
ULINT_UNDEFINED */
{
ulint before;
ulint after;
ut_ad(buf_block_get_page_zip(block) == NULL);
before = ibuf_index_page_calc_free_bits(
srv_page_size, max_ins_size);
if (max_ins_size >= increase) {
compile_time_assert(ULINT32_UNDEFINED > UNIV_PAGE_SIZE_MAX);
after = ibuf_index_page_calc_free_bits(
srv_page_size, max_ins_size - increase);
#ifdef UNIV_IBUF_DEBUG
ut_a(after <= ibuf_index_page_calc_free(block));
#endif
} else {
after = ibuf_index_page_calc_free(block);
}
if (after == 0) {
/* We move the page to the front of the buffer pool LRU list:
the purpose of this is to prevent those pages to which we
cannot make inserts using the insert buffer from slipping
out of the buffer pool */
buf_page_make_young(&block->page);
}
if (before > after) {
ibuf_set_free_bits(block, after, before);
}
}

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -417,16 +417,6 @@ public:
/** The recovery system */
extern recv_sys_t recv_sys;
/** If the following is TRUE, the buffer pool file pages must be invalidated
after recovery and no ibuf operations are allowed; this will be set if
recv_sys.pages becomes too full, and log records must be merged
to file pages already before the recovery is finished: in this case no
ibuf operations are allowed, as they could modify the pages read in the
buffer pool before the pages have been recovered to the up-to-date state.
TRUE means that recovery is running and no operations on the log files
are allowed yet: the variable name is misleading. */
extern bool recv_no_ibuf_operations;
/** TRUE when recv_init_crash_recovery() has been called. */
extern bool recv_needed_recovery;
#ifdef UNIV_DEBUG

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -381,15 +381,6 @@ public:
@retval 0 if the transaction only modified temporary tablespaces */
lsn_t commit_lsn() const { ut_ad(has_committed()); return m_commit_lsn; }
/** Note that we are inside the change buffer code. */
void enter_ibuf() { m_inside_ibuf= true; }
/** Note that we have exited from the change buffer code. */
void exit_ibuf() { m_inside_ibuf= false; }
/** @return true if we are inside the change buffer code */
bool is_inside_ibuf() const { return m_inside_ibuf; }
/** Note that some pages have been freed */
void set_trim_pages() { m_trim_pages= true; }
@@ -792,10 +783,6 @@ private:
/** whether log_sys.latch is locked exclusively */
uint16_t m_latch_ex:1;
/** whether change buffer is latched; only needed in non-debug builds
to suppress some read-ahead operations, @see ibuf_inside() */
uint16_t m_inside_ibuf:1;
/** whether the pages has been trimmed */
uint16_t m_trim_pages:1;

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2018, 2022, MariaDB Corporation.
Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -117,11 +117,6 @@ succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
@@ -151,11 +146,6 @@ page_cur_insert_rec_low(
Inserts a record next to page cursor on a compressed and uncompressed
page.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to inserted record
@return nullptr on failure */
rec_t*

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2022, MariaDB Corporation.
Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -155,11 +155,6 @@ succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*

View File

@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -95,7 +95,7 @@ this byte can be garbage. */
direction */
#define PAGE_N_RECS 16 /* number of user records on the page */
/** The largest DB_TRX_ID that may have modified a record on the page;
Defined only in secondary index leaf pages and in change buffer leaf pages.
Defined only in secondary index leaf pages.
Otherwise written as 0. @see PAGE_ROOT_AUTO_INC */
#define PAGE_MAX_TRX_ID 18
/** The AUTO_INCREMENT value (on persistent clustered index root pages). */
@@ -901,11 +901,6 @@ MY_ATTRIBUTE((nonnull, warn_unused_result))
Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page or compress the page.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_t::commit().
@return error code */
dberr_t
page_copy_rec_list_end_no_locks(
@@ -920,11 +915,6 @@ Copies records from page to new_page, from the given record onward,
including that record. Infimum and supremum records are not copied.
The records are copied to the start of the record list on new_page.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_t::commit().
@return pointer to the original successor of the infimum record on new_block
@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */
rec_t*
@@ -942,11 +932,6 @@ Copies records from page to new_page, up to the given record, NOT
including that record. Infimum and supremum records are not copied.
The records are copied to the end of the record list on new_page.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to the original predecessor of the supremum record on new_block
@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */
rec_t*

View File

@@ -2,7 +2,7 @@
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -213,9 +213,9 @@ page_zip_max_ins_size(
/**********************************************************************//**
Determine if enough space is available in the modification log.
@return TRUE if page_zip_write_rec() will succeed */
@return true if page_zip_write_rec() will succeed */
UNIV_INLINE
ibool
bool
page_zip_available(
/*===============*/
const page_zip_des_t* page_zip,/*!< in: compressed page */
@@ -323,10 +323,6 @@ Reorganize and compress a page. This is a low-level operation for
compressed pages, to be used when page_zip_compress() fails.
On success, redo log will be written.
The function btr_page_reorganize() should be preferred whenever possible.
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
non-clustered index, the caller must update the insert buffer free
bits in the same mini-transaction in such a way that the modification
will be redo-logged.
@return error code
@retval DB_FAIL on overflow; the block_zip will be left intact */
dberr_t

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -269,7 +269,7 @@ page_zip_max_ins_size(
Determine if enough space is available in the modification log.
@return TRUE if enough space is available */
UNIV_INLINE
ibool
bool
page_zip_available(
/*===============*/
const page_zip_des_t* page_zip,/*!< in: compressed page */

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1095,9 +1095,7 @@ rec_get_converted_size(
ut_ad(dtuple_check_typed(dtuple));
#ifdef UNIV_DEBUG
if (dict_index_is_ibuf(index)) {
ut_ad(dtuple->n_fields > 1);
} else if ((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
if ((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
== REC_STATUS_NODE_PTR) {
ut_ad(dtuple->n_fields - 1
== dict_index_get_n_unique_in_tree_nonleaf(index));

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -37,39 +37,6 @@ Created 3/14/1997 Heikki Tuuri
#include <queue>
class MDL_ticket;
/** Determines if it is possible to remove a secondary index entry.
Removal is possible if the secondary index entry does not refer to any
not delete marked version of a clustered index record where DB_TRX_ID
is newer than the purge view.
NOTE: This function should only be called by the purge thread, only
while holding a latch on the leaf page of the secondary index entry
(or keeping the buffer pool watch on the page). It is possible that
this function first returns true and then false, if a user transaction
inserts a record that the secondary index entry would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
@param[in,out] node row purge node
@param[in] index secondary index
@param[in] entry secondary index entry
@param[in,out] sec_pcur secondary index cursor or NULL
if it is called for purge buffering
operation.
@param[in,out] sec_mtr mini-transaction which holds
secondary index entry or NULL if it is
called for purge buffering operation.
@param[in] is_tree true=pessimistic purge,
false=optimistic (leaf-page only)
@return true if the secondary index record can be purged */
bool
row_purge_poss_sec(
purge_node_t* node,
dict_index_t* index,
const dtuple_t* entry,
btr_pcur_t* sec_pcur=NULL,
mtr_t* sec_mtr=NULL,
bool is_tree=false);
/***************************************************************
Does the purge operation.

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2022, MariaDB Corporation.
Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +28,6 @@ Created 4/20/1996 Heikki Tuuri
#define row0row_h
#include "que0types.h"
#include "ibuf0ibuf.h"
#include "trx0types.h"
#include "mtr0mtr.h"
#include "rem0types.h"
@@ -344,31 +343,19 @@ row_parse_int(
ulint mtype,
bool unsigned_type);
/** Result of row_search_index_entry */
enum row_search_result {
ROW_FOUND = 0, /*!< the record was found */
ROW_NOT_FOUND, /*!< record not found */
ROW_BUFFERED, /*!< one of BTR_INSERT, BTR_DELETE, or
BTR_DELETE_MARK was specified, the
secondary index leaf page was not in
the buffer pool, and the operation was
enqueued in the insert/delete buffer */
ROW_NOT_DELETED_REF /*!< BTR_DELETE was specified, and
row_purge_poss_sec() failed */
};
/***************************************************************//**
Searches an index record.
@return whether the record was found or buffered */
enum row_search_result
@return whether the record was found */
bool
row_search_index_entry(
/*===================*/
const dtuple_t* entry, /*!< in: index entry */
btr_latch_mode mode, /*!< in: BTR_MODIFY_LEAF, ... */
btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must
be closed by the caller */
que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull, warn_unused_result));
MY_ATTRIBUTE((nonnull(1,3,5), warn_unused_result));
#define ROW_COPY_DATA 1
#define ROW_COPY_POINTERS 2
@@ -398,22 +385,17 @@ row_raw_format(
in bytes */
MY_ATTRIBUTE((nonnull, warn_unused_result));
#include "dict0mem.h"
/** Prepare to start a mini-transaction to modify an index.
@param[in,out] mtr mini-transaction
@param[in,out] index possibly secondary index
@param[in] pessimistic whether this is a pessimistic operation */
inline
void
row_mtr_start(mtr_t* mtr, dict_index_t* index, bool pessimistic)
@param[in,out] index possibly secondary index */
inline void row_mtr_start(mtr_t* mtr, dict_index_t* index)
{
mtr->start();
switch (index->table->space_id) {
case IBUF_SPACE_ID:
if (pessimistic
&& !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
ibuf_free_excess_pages();
}
case 0:
break;
case SRV_TMP_SPACE_ID:
mtr->set_log_mode(MTR_LOG_NO_REDO);

View File

@@ -2,7 +2,7 @@
Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -228,12 +228,8 @@ enum monitor_id_t {
MONITOR_MODULE_BUF_PAGE,
MONITOR_INDEX_LEAF_PAGE_READ,
MONITOR_INDEX_NON_LEAF_PAGE_READ,
MONITOR_INDEX_IBUF_LEAF_PAGE_READ,
MONITOR_INDEX_IBUF_NON_LEAF_PAGE_READ,
MONITOR_UNDO_LOG_PAGE_READ,
MONITOR_INODE_PAGE_READ,
MONITOR_IBUF_FREELIST_PAGE_READ,
MONITOR_IBUF_BITMAP_PAGE_READ,
MONITOR_SYSTEM_PAGE_READ,
MONITOR_TRX_SYSTEM_PAGE_READ,
MONITOR_FSP_HDR_PAGE_READ,
@@ -244,12 +240,8 @@ enum monitor_id_t {
MONITOR_OTHER_PAGE_READ,
MONITOR_INDEX_LEAF_PAGE_WRITTEN,
MONITOR_INDEX_NON_LEAF_PAGE_WRITTEN,
MONITOR_INDEX_IBUF_LEAF_PAGE_WRITTEN,
MONITOR_INDEX_IBUF_NON_LEAF_PAGE_WRITTEN,
MONITOR_UNDO_LOG_PAGE_WRITTEN,
MONITOR_INODE_PAGE_WRITTEN,
MONITOR_IBUF_FREELIST_PAGE_WRITTEN,
MONITOR_IBUF_BITMAP_PAGE_WRITTEN,
MONITOR_SYSTEM_PAGE_WRITTEN,
MONITOR_TRX_SYSTEM_PAGE_WRITTEN,
MONITOR_FSP_HDR_PAGE_WRITTEN,
@@ -347,17 +339,6 @@ enum monitor_id_t {
MONITOR_MODULE_FIL_SYSTEM,
MONITOR_OVLD_N_FILE_OPENED,
/* InnoDB Change Buffer related counters */
MONITOR_MODULE_IBUF_SYSTEM,
MONITOR_OVLD_IBUF_MERGE_INSERT,
MONITOR_OVLD_IBUF_MERGE_DELETE,
MONITOR_OVLD_IBUF_MERGE_PURGE,
MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT,
MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE,
MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE,
MONITOR_OVLD_IBUF_MERGES,
MONITOR_OVLD_IBUF_SIZE,
/* Counters for server operations */
MONITOR_MODULE_SERVER,
MONITOR_MASTER_THREAD_SLEEP,

View File

@@ -170,9 +170,9 @@ extern char* srv_data_home;
recovery and open all tables in RO mode instead of RW mode. We don't
sync the max trx id to disk either. */
extern my_bool srv_read_only_mode;
/** Set if InnoDB operates in read-only mode or innodb-force-recovery
is greater than SRV_FORCE_NO_IBUF_MERGE. */
extern my_bool high_level_read_only;
/** Set if innodb_read_only is set or innodb_force_recovery
is SRV_FORCE_NO_UNDO_LOG_SCAN or greater. */
extern bool high_level_read_only;
/** store to its own file each table created by an user; data
dictionary tables are in the system tablespace 0 */
extern my_bool srv_file_per_table;
@@ -585,11 +585,6 @@ void srv_monitor_task(void*);
void srv_master_callback(void*);
/**
Complete the shutdown tasks such as background DROP TABLE,
and optionally change buffer merge (on innodb_fast_shutdown=0). */
void srv_shutdown(bool ibuf_merge);
} /* extern "C" */
#ifdef UNIV_DEBUG

View File

@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2020, 2022, MariaDB Corporation.
Copyright (c) 2020, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -149,7 +149,7 @@ private:
#endif
public:
/** In crash recovery or the change buffer, claim the ownership
/** In crash recovery, claim the ownership
of the exclusive block lock to the current thread */
void claim_ownership() { set_new_owner(pthread_self()); }

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2022, MariaDB Corporation.
Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -774,13 +774,17 @@ public:
const char* op_info; /*!< English text describing the
current operation, or an empty
string */
uint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
bool check_foreigns; /*!< normally TRUE, but if the user
wants to suppress foreign key checks,
(in table imports, for example) we
set this FALSE */
/** TRX_ISO_REPEATABLE_READ, ... */
unsigned isolation_level:2;
/** normally set; "SET foreign_key_checks=0" can be issued to suppress
foreign key checks, in table imports, for example */
unsigned check_foreigns:1;
/** normally set; "SET unique_checks=0, foreign_key_checks=0"
enables bulk insert into an empty table */
unsigned check_unique_secondary:1;
/** whether an insert into an empty table is active */
bool bulk_insert;
unsigned bulk_insert:1;
/*------------------------------*/
/* MySQL has a transaction coordinator to coordinate two phase
commit between multiple storage engines and the binary log. When
@@ -794,13 +798,6 @@ public:
/** whether this is holding the prepare mutex */
bool active_commit_ordered;
/*------------------------------*/
bool check_unique_secondary;
/*!< normally TRUE, but if the user
wants to speed up inserts by
suppressing unique key checks
for secondary indexes when we decide
if we can use the insert buffer for
them, we set this FALSE */
bool flush_log_later;/* In 2PC, we hold the
prepare_commit mutex across
both phases. In that case, we

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -142,7 +142,6 @@ using the call command. */
assertions. */
#define UNIV_LRU_DEBUG /* debug the buffer pool LRU */
#define UNIV_HASH_DEBUG /* debug HASH_ macros */
#define UNIV_IBUF_DEBUG /* debug the insert buffer */
#define UNIV_PERF_DEBUG /* debug flag that enables
light weight performance
related stuff. */
@@ -475,9 +474,6 @@ extern mysql_pfs_key_t fts_cache_mutex_key;
extern mysql_pfs_key_t fts_cache_init_mutex_key;
extern mysql_pfs_key_t fts_delete_mutex_key;
extern mysql_pfs_key_t fts_doc_id_mutex_key;
extern mysql_pfs_key_t ibuf_bitmap_mutex_key;
extern mysql_pfs_key_t ibuf_mutex_key;
extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
extern mysql_pfs_key_t recalc_pool_mutex_key;
extern mysql_pfs_key_t purge_sys_pq_mutex_key;
extern mysql_pfs_key_t recv_sys_mutex_key;

View File

@@ -2,7 +2,7 @@
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Google Inc.
Copyright (c) 2014, 2022, MariaDB Corporation.
Copyright (c) 2014, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -913,15 +913,6 @@ void log_write_up_to(lsn_t lsn, bool durable,
{
ut_ad(!srv_read_only_mode);
ut_ad(lsn != LSN_MAX);
if (UNIV_UNLIKELY(recv_no_ibuf_operations))
{
/* A non-final batch of recovery is active no writes to the log
are allowed yet. */
ut_a(!callback);
return;
}
ut_ad(lsn <= log_sys.get_lsn());
#ifdef HAVE_PMEM
@@ -947,6 +938,7 @@ repeat:
if (write_lock.acquire(lsn, durable ? nullptr : callback) ==
group_commit_lock::ACQUIRED)
{
ut_ad(!recv_no_log_write || srv_operation != SRV_OPERATION_NORMAL);
log_sys.latch.wr_lock(SRW_LOCK_CALL);
pending_write_lsn= write_lock.release(log_sys.write_buf<true>());
}
@@ -1079,11 +1071,9 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
ib::info() << "Starting shutdown...";
/* Wait until the master thread and all other operations are idle: our
/* Wait until the master task and all other operations are idle: our
algorithm only works if the server is idle at shutdown */
bool do_srv_shutdown = false;
if (srv_master_timer) {
do_srv_shutdown = srv_fast_shutdown < 2;
srv_master_timer.reset();
}
@@ -1100,11 +1090,6 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
}
srv_monitor_timer.reset();
if (do_srv_shutdown) {
srv_shutdown(srv_fast_shutdown == 0);
}
loop:
ut_ad(lock_sys.is_initialised() || !srv_was_started);
ut_ad(log_sys.is_initialised() || !srv_was_started);

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2022, MariaDB Corporation.
Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -46,7 +46,6 @@ Created 9/20/1997 Heikki Tuuri
#include "page0page.h"
#include "page0cur.h"
#include "trx0undo.h"
#include "ibuf0ibuf.h"
#include "trx0undo.h"
#include "trx0rec.h"
#include "fil0fil.h"
@@ -71,17 +70,6 @@ number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
recv_recovery_from_checkpoint_start(). */
bool recv_lsn_checks_on;
/** If the following is TRUE, the buffer pool file pages must be invalidated
after recovery and no ibuf operations are allowed; this becomes TRUE if
the log record hash table becomes too full, and log records must be merged
to file pages already before the recovery is finished: in this case no
ibuf operations are allowed, as they could modify the pages read in the
buffer pool before the pages have been recovered to the up-to-date state.
true means that recovery is running and no operations on the log file
are allowed yet: the variable name is misleading. */
bool recv_no_ibuf_operations;
/** The maximum lsn we see for a page during the recovery process. If this
is bigger than the lsn we are able to scan up to, that is an indication that
the recovery failed and the database may be corrupt. */
@@ -1089,64 +1077,11 @@ public:
void reset()
{
mysql_mutex_assert_owner(&recv_sys.mutex);
ut_ad(recv_no_ibuf_operations);
for (map::value_type& i : inits) {
i.second.created = false;
}
}
/** On the last recovery batch, mark whether there exist
buffered changes for the pages that were initialized
by buf_page_create() and still reside in the buffer pool.
@param[in,out] mtr dummy mini-transaction */
void mark_ibuf_exist(mtr_t& mtr)
{
mysql_mutex_assert_owner(&recv_sys.mutex);
mtr.start();
for (const map::value_type& i : inits) {
if (!i.second.created) {
continue;
}
if (buf_block_t* block = buf_page_get_low(
i.first, 0, RW_X_LATCH, nullptr,
BUF_GET_IF_IN_POOL,
&mtr, nullptr, false)) {
if (UNIV_LIKELY_NULL(block->page.zip.data)) {
switch (fil_page_get_type(
block->page.zip.data)) {
case FIL_PAGE_INDEX:
case FIL_PAGE_RTREE:
if (page_zip_decompress(
&block->page.zip,
block->page.frame,
true)) {
break;
}
ib::error() << "corrupted "
<< block->page.id();
}
}
if (recv_no_ibuf_operations) {
mtr.commit();
mtr.start();
continue;
}
mysql_mutex_unlock(&recv_sys.mutex);
if (ibuf_page_exists(block->page.id(),
block->zip_size())) {
block->page.set_ibuf_exist();
}
mtr.commit();
mtr.start();
mysql_mutex_lock(&recv_sys.mutex);
}
}
mtr.commit();
clear();
}
/** Clear the data structure */
void clear() { inits.clear(); }
};
@@ -3436,10 +3371,6 @@ void recv_sys_t::apply(bool last_batch)
}
}
recv_no_ibuf_operations = !last_batch ||
srv_operation == SRV_OPERATION_RESTORE ||
srv_operation == SRV_OPERATION_RESTORE_EXPORT;
mtr_t mtr;
if (!pages.empty())
@@ -3580,10 +3511,7 @@ next_free_block:
}
}
if (last_batch)
/* We skipped this in buf_page_create(). */
mlog_init.mark_ibuf_exist(mtr);
else
if (!last_batch)
{
mlog_init.reset();
log_sys.latch.wr_unlock();
@@ -4298,7 +4226,6 @@ err_exit:
mysql_mutex_lock(&recv_sys.mutex);
recv_sys.apply_log_recs = true;
recv_no_ibuf_operations = false;
ut_d(recv_no_log_write = srv_operation == SRV_OPERATION_RESTORE
|| srv_operation == SRV_OPERATION_RESTORE_EXPORT);
if (srv_operation == SRV_OPERATION_NORMAL) {

View File

@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -175,7 +175,6 @@ void mtr_t::start()
m_made_dirty= false;
m_latch_ex= false;
m_inside_ibuf= false;
m_modifications= false;
m_log_mode= MTR_LOG_ALL;
ut_d(m_user_space_id= TRX_SYS_SPACE);
@@ -323,7 +322,6 @@ void mtr_t::release()
void mtr_t::commit()
{
ut_ad(is_active());
ut_ad(!is_inside_ibuf());
/* This is a dirty read, for debugging. */
ut_ad(!m_modifications || !recv_no_log_write);
@@ -506,7 +504,6 @@ void mtr_t::rollback_to_savepoint(ulint begin, ulint end)
void mtr_t::commit_shrink(fil_space_t &space)
{
ut_ad(is_active());
ut_ad(!is_inside_ibuf());
ut_ad(!high_level_read_only);
ut_ad(m_modifications);
ut_ad(m_made_dirty);
@@ -613,7 +610,6 @@ void mtr_t::commit_shrink(fil_space_t &space)
bool mtr_t::commit_file(fil_space_t &space, const char *name)
{
ut_ad(is_active());
ut_ad(!is_inside_ibuf());
ut_ad(!high_level_read_only);
ut_ad(m_modifications);
ut_ad(!m_made_dirty);
@@ -724,7 +720,6 @@ lsn_t mtr_t::commit_files(lsn_t checkpoint_lsn)
ut_ad(log_sys.latch.is_write_locked());
#endif
ut_ad(is_active());
ut_ad(!is_inside_ibuf());
ut_ad(m_log_mode == MTR_LOG_ALL);
ut_ad(!m_made_dirty);
ut_ad(!m_memo);

View File

@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2018, 2022, MariaDB Corporation.
Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1369,8 +1369,7 @@ page_cur_insert_rec_low(
ut_ad(!!page_is_comp(block->page.frame) == !!rec_offs_comp(offsets));
ut_ad(fil_page_index_page_check(block->page.frame));
ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + block->page.frame) ==
index->id ||
mtr->is_inside_ibuf());
index->id || index->is_dummy);
ut_ad(page_dir_get_n_slots(block->page.frame) >= 2);
ut_ad(!page_rec_is_supremum(cur->rec));
@@ -1769,11 +1768,6 @@ static inline void page_zip_dir_add_slot(buf_block_t *block,
Inserts a record next to page cursor on a compressed and uncompressed
page.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to inserted record
@return nullptr on failure */
rec_t*
@@ -1797,8 +1791,7 @@ page_cur_insert_rec_zip(
ut_ad(rec_offs_comp(offsets));
ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX ||
fil_page_get_type(page) == FIL_PAGE_RTREE);
ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + page) ==
index->id || mtr->is_inside_ibuf());
ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + page) == index->id);
ut_ad(!page_get_instant(page));
ut_ad(!page_cur_is_after_last(cursor));
#ifdef UNIV_ZIP_DEBUG
@@ -2265,8 +2258,7 @@ page_cur_delete_rec(
== index->table->not_redundant());
ut_ad(fil_page_index_page_check(block->page.frame));
ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + block->page.frame)
== index->id
|| mtr->is_inside_ibuf());
== index->id);
ut_ad(mtr->is_named_space(index->table->space));
/* The record must not be the supremum or infimum record. */

View File

@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2017, 2022, MariaDB Corporation.
Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -336,17 +336,13 @@ page_create_zip(
/* PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC are always 0 for
temporary tables. */
ut_ad(max_trx_id == 0 || !index->table->is_temporary());
/* In secondary indexes and the change buffer, PAGE_MAX_TRX_ID
/* In secondary indexes, PAGE_MAX_TRX_ID
must be zero on non-leaf pages. max_trx_id can be 0 when the
index consists of an empty root (leaf) page. */
ut_ad(max_trx_id == 0
|| level == 0
|| !dict_index_is_sec_or_ibuf(index)
|| index->table->is_temporary());
/* In the clustered index, PAGE_ROOT_AUTOINC or
index consists of an empty root (leaf) page.
the clustered index, PAGE_ROOT_AUTOINC or
PAGE_MAX_TRX_ID must be 0 on other pages than the root. */
ut_ad(level == 0 || max_trx_id == 0
|| !dict_index_is_sec_or_ibuf(index)
ut_ad(max_trx_id == 0 || level == 0 || index->is_primary()
|| index->table->is_temporary());
buf_block_modify_clock_inc(block);
@@ -390,8 +386,7 @@ page_create_empty(
same temp-table in parallel.
max_trx_id is ignored for temp tables because it not required
for MVCC. */
if (dict_index_is_sec_or_ibuf(index)
&& !index->table->is_temporary()
if (!index->is_primary() && !index->table->is_temporary()
&& page_is_leaf(block->page.frame)) {
max_trx_id = page_get_max_trx_id(block->page.frame);
ut_ad(max_trx_id);
@@ -435,11 +430,6 @@ page_create_empty(
Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page or compress the page.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return error code */
dberr_t
page_copy_rec_list_end_no_locks(
@@ -507,11 +497,6 @@ Copies records from page to new_page, from a given record onward,
including that record. Infimum and supremum records are not copied.
The records are copied to the start of the record list on new_page.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_t::commit().
@return pointer to the original successor of the infimum record on new_block
@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */
rec_t*
@@ -603,8 +588,7 @@ err_exit:
same temp-table in parallel.
max_trx_id is ignored for temp tables because it not required
for MVCC. */
if (dict_index_is_sec_or_ibuf(index)
&& page_is_leaf(page)
if (!index->is_primary() && page_is_leaf(page)
&& !index->table->is_temporary()) {
ut_ad(!was_empty || page_dir_get_n_heap(new_page)
== PAGE_HEAP_NO_USER_LOW
@@ -677,11 +661,6 @@ Copies records from page to new_page, up to the given record,
NOT including that record. Infimum and supremum records are not copied.
The records are copied to the end of the record list on new_page.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if new_block is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit().
@return pointer to the original predecessor of the supremum record on new_block
@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */
rec_t*
@@ -786,8 +765,7 @@ corrupted:
same temp-table in parallel.
max_trx_id is ignored for temp tables because it not required
for MVCC. */
if (n_core && dict_index_is_sec_or_ibuf(index)
&& !index->table->is_temporary()) {
if (n_core && !index->is_primary() && !index->table->is_temporary()) {
page_update_max_trx_id(new_block,
new_page_zip,
page_get_max_trx_id(block->page.frame),
@@ -2059,7 +2037,7 @@ func_exit2:
max_trx_id is ignored for temp tables because it not required
for MVCC. */
if (!page_is_leaf(page) || page_is_empty(page)
|| !dict_index_is_sec_or_ibuf(index)
|| index->is_primary()
|| index->table->is_temporary()) {
} else if (trx_id_t sys_max_trx_id = trx_sys.get_max_trx_id()) {
trx_id_t max_trx_id = page_get_max_trx_id(page);

Some files were not shown because too many files have changed in this diff Show More