mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-11336: Enable defragmentation on 10.2 when tests pass
Problem was that we could take page latches on different order than wat is entitled with SX-lock. To follow the latching order defined in WL#6326, acquire index->lock X-latch. This entitles us to acquire page latches in any order for the index. btr0btr.cc Document latch rules before and after MariaDB 10.2.2 sync0rw.cc Document latch compatibility rules better. btr_defragment_merge_pages Fix parameter value. btr_defragment_thread Acquire X-lock to dict_index_t::lock before restoring cursor position and continuing defragmentation. ha_innobase::optimize Restore defragment feature. Testing Add GIS-index and FT-index to table being defragmented. Defragmentation is not done to GIS-indexes and FT auxiliary tables.
This commit is contained in:
@ -10,14 +10,5 @@
|
||||
#
|
||||
##############################################################################
|
||||
|
||||
innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment
|
||||
innodb.defrag_mdl-9155 : MDEV-11336 Fix and enable innodb_defragment
|
||||
innodb.innodb_defrag_concurrent : MDEV-11336 Fix and enable innodb_defragment
|
||||
innodb.innodb_defrag_stats : MDEV-11336 Fix and enable innodb_defragment
|
||||
innodb.innodb_defrag_stats_many_tables : MDEV-11336 Fix and enable innodb_defragment
|
||||
innodb.innodb_defragment : MDEV-11336 Fix and enable innodb_defragment
|
||||
innodb.innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment
|
||||
innodb.innodb_defragment_small : MDEV-11336 Fix and enable innodb_defragment
|
||||
innodb.innodb_defrag_binlog : MDEV-11336 Fix and enable innodb_defragment
|
||||
innodb-wl5980-alter : MDEV-9469 / MDEV-13668 extra crash in 10.2
|
||||
create-index-debug : MDEV-13680 InnoDB may crash when btr_page_alloc() fails
|
||||
|
@ -3,7 +3,15 @@ select @@global.innodb_stats_persistent;
|
||||
@@global.innodb_stats_persistent
|
||||
0
|
||||
set global innodb_defragment_stats_accuracy = 80;
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB;
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
|
||||
b VARCHAR(256),
|
||||
c INT,
|
||||
g GEOMETRY NOT NULL,
|
||||
t VARCHAR(256),
|
||||
KEY second(a, b),
|
||||
KEY third(c),
|
||||
SPATIAL gk(g),
|
||||
FULLTEXT INDEX fti(t)) ENGINE=INNODB;
|
||||
connect con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
|
||||
connect con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
|
||||
connect con3,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
|
||||
@ -40,9 +48,9 @@ count(stat_value) > 0
|
||||
connection con1;
|
||||
optimize table t1;;
|
||||
connection default;
|
||||
INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);;
|
||||
INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.');;
|
||||
connection con2;
|
||||
INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);;
|
||||
INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.');;
|
||||
connection con3;
|
||||
DELETE FROM t1 where a between 1 and 100;;
|
||||
connection con4;
|
||||
@ -59,6 +67,9 @@ disconnect con4;
|
||||
optimize table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 optimize status OK
|
||||
check table t1 extended;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 check status OK
|
||||
select count(*) from t1;
|
||||
count(*)
|
||||
15723
|
||||
|
@ -16,7 +16,26 @@ select @@global.innodb_stats_persistent;
|
||||
set global innodb_defragment_stats_accuracy = 80;
|
||||
|
||||
# Create table.
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB;
|
||||
#
|
||||
# TODO: Currently we do not defragment spatial indexes,
|
||||
# because doing it properly would require
|
||||
# appropriate logic around the SSN (split
|
||||
# sequence number).
|
||||
#
|
||||
# Also do not defragment auxiliary tables related to FULLTEXT INDEX.
|
||||
#
|
||||
# Both types added to this test to make sure they do not cause
|
||||
# problems.
|
||||
#
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
|
||||
b VARCHAR(256),
|
||||
c INT,
|
||||
g GEOMETRY NOT NULL,
|
||||
t VARCHAR(256),
|
||||
KEY second(a, b),
|
||||
KEY third(c),
|
||||
SPATIAL gk(g),
|
||||
FULLTEXT INDEX fti(t)) ENGINE=INNODB;
|
||||
|
||||
connect (con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
||||
connect (con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
||||
@ -36,7 +55,7 @@ let $i = $data_size;
|
||||
while ($i)
|
||||
{
|
||||
eval
|
||||
INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i);
|
||||
INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i, Point($i,$i), 'This is a test message.');
|
||||
dec $i;
|
||||
}
|
||||
--enable_query_log
|
||||
@ -69,10 +88,10 @@ connection con1;
|
||||
--send optimize table t1;
|
||||
|
||||
connection default;
|
||||
--send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);
|
||||
--send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.');
|
||||
|
||||
connection con2;
|
||||
--send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);
|
||||
--send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.');
|
||||
|
||||
connection con3;
|
||||
--send DELETE FROM t1 where a between 1 and 100;
|
||||
@ -103,6 +122,7 @@ disconnect con3;
|
||||
disconnect con4;
|
||||
|
||||
optimize table t1;
|
||||
check table t1 extended;
|
||||
|
||||
select count(*) from t1;
|
||||
select count(*) from t1 force index (second);
|
||||
|
@ -77,22 +77,85 @@ btr_corruption_report(
|
||||
/*
|
||||
Latching strategy of the InnoDB B-tree
|
||||
--------------------------------------
|
||||
A tree latch protects all non-leaf nodes of the tree. Each node of a tree
|
||||
also has a latch of its own.
|
||||
|
||||
A B-tree operation normally first acquires an S-latch on the tree. It
|
||||
searches down the tree and releases the tree latch when it has the
|
||||
leaf node latch. To save CPU time we do not acquire any latch on
|
||||
non-leaf nodes of the tree during a search, those pages are only bufferfixed.
|
||||
Node pointer page latches acquisition is protected by index->lock latch.
|
||||
|
||||
If an operation needs to restructure the tree, it acquires an X-latch on
|
||||
the tree before searching to a leaf node. If it needs, for example, to
|
||||
split a leaf,
|
||||
(1) InnoDB decides the split point in the leaf,
|
||||
(2) allocates a new page,
|
||||
(3) inserts the appropriate node pointer to the first non-leaf level,
|
||||
(4) releases the tree X-latch,
|
||||
(5) and then moves records from the leaf to the new allocated page.
|
||||
Before MariaDB 10.2.2, all node pointer pages were protected by index->lock
|
||||
either in S (shared) or X (exclusive) mode and block->lock was not acquired on
|
||||
node pointer pages.
|
||||
|
||||
After MariaDB 10.2.2, block->lock S-latch or X-latch is used to protect
|
||||
node pointer pages and obtaiment of node pointer page latches is protected by
|
||||
index->lock.
|
||||
|
||||
(0) Definition: B-tree level.
|
||||
|
||||
(0.1) The leaf pages of the B-tree are at level 0.
|
||||
|
||||
(0.2) The parent of a page at level L has level L+1. (The level of the
|
||||
root page is equal to the tree height.)
|
||||
|
||||
(0.3) The B-tree lock (index->lock) is the parent of the root page and
|
||||
has a level = tree height + 1.
|
||||
|
||||
Index->lock has 3 possible locking modes:
|
||||
|
||||
(1) S-latch:
|
||||
|
||||
(1.1) All latches for pages must be obtained in descending order of tree level.
|
||||
|
||||
(1.2) Before obtaining the first node pointer page latch at a given B-tree
|
||||
level, parent latch must be held (at level +1 ).
|
||||
|
||||
(1.3) If a node pointer page is already latched at the same level
|
||||
we can only obtain latch to its right sibling page latch at the same level.
|
||||
|
||||
(1.4) Release of the node pointer page latches must be done in
|
||||
child-to-parent order. (Prevents deadlocks when obtained index->lock
|
||||
in SX mode).
|
||||
|
||||
(1.4.1) Level L node pointer page latch can be released only when
|
||||
no latches at children level i.e. level < L are hold.
|
||||
|
||||
(1.4.2) All latches from node pointer pages must be released so
|
||||
that no latches are obtained between.
|
||||
|
||||
(1.5) [implied by (1.1), (1.2)] Root page latch must be first node pointer
|
||||
latch obtained.
|
||||
|
||||
(2) SX-latch:
|
||||
|
||||
In this case rules (1.2) and (1.3) from S-latch case are relaxed and
|
||||
merged into (2.2) and rule (1.4) is removed. Thus, latch acquisition
|
||||
can be skipped at some tree levels and latches can be obtained in
|
||||
a less restricted order.
|
||||
|
||||
(2.1) [identical to (1.1)]: All latches for pages must be obtained in descending
|
||||
order of tree level.
|
||||
|
||||
(2.2) When a node pointer latch at level L is obtained,
|
||||
the left sibling page latch in the same level or some ancestor
|
||||
page latch (at level > L) must be hold.
|
||||
|
||||
(2.3) [implied by (2.1), (2.2)] The first node pointer page latch obtained can
|
||||
be any node pointer page.
|
||||
|
||||
(3) X-latch:
|
||||
|
||||
Node pointer latches can be obtained in any order.
|
||||
|
||||
NOTE: New rules after MariaDB 10.2.2 does not affect the latching rules of leaf pages:
|
||||
|
||||
index->lock S-latch is needed in read for the node pointer traversal. When the leaf
|
||||
level is reached, index-lock can be released (and with the MariaDB 10.2.2 changes, all
|
||||
node pointer latches). Left to right index travelsal in leaf page level can be safely done
|
||||
by obtaining right sibling leaf page latch and then releasing the old page latch.
|
||||
|
||||
Single leaf page modifications (BTR_MODIFY_LEAF) are protected by index->lock
|
||||
S-latch.
|
||||
|
||||
B-tree operations involving page splits or merges (BTR_MODIFY_TREE) and page
|
||||
allocations are protected by index->lock X-latch.
|
||||
|
||||
Node pointers
|
||||
-------------
|
||||
|
@ -564,7 +564,7 @@ btr_defragment_merge_pages(
|
||||
page_get_infimum_rec(from_page));
|
||||
node_ptr = dict_index_build_node_ptr(
|
||||
index, rec, page_get_page_no(from_page),
|
||||
heap, level + 1);
|
||||
heap, level);
|
||||
btr_insert_on_non_leaf_level(0, index, level+1,
|
||||
node_ptr, mtr);
|
||||
}
|
||||
@ -797,11 +797,16 @@ DECLARE_THREAD(btr_defragment_thread)(void*)
|
||||
|
||||
now = ut_timer_now();
|
||||
mtr_start(&mtr);
|
||||
btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
|
||||
cursor = btr_pcur_get_btr_cur(pcur);
|
||||
index = btr_cur_get_index(cursor);
|
||||
first_block = btr_cur_get_block(cursor);
|
||||
mtr.set_named_space(index->space);
|
||||
/* To follow the latching order defined in WL#6326, acquire index->lock X-latch.
|
||||
This entitles us to acquire page latches in any order for the index. */
|
||||
mtr_x_lock(&index->lock, &mtr);
|
||||
/* This will acquire index->lock SX-latch, which per WL#6363 is allowed
|
||||
when we are already holding the X-latch. */
|
||||
btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
|
||||
first_block = btr_cur_get_block(cursor);
|
||||
|
||||
last_block = btr_defragment_n_pages(first_block, index,
|
||||
srv_defragment_n_pages,
|
||||
|
@ -15066,7 +15066,7 @@ ha_innobase::optimize(
|
||||
calls to OPTIMIZE, which is undesirable. */
|
||||
|
||||
/* TODO: Defragment is disabled for now */
|
||||
if (0) {
|
||||
if (srv_defragment) {
|
||||
int err;
|
||||
|
||||
err = defragment_table(m_prebuilt->table->name.m_name, NULL, false);
|
||||
|
@ -84,10 +84,15 @@ lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR):
|
||||
2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR)
|
||||
|
||||
LOCK COMPATIBILITY MATRIX
|
||||
S SX X
|
||||
S + + -
|
||||
SX + - -
|
||||
X - - -
|
||||
|
||||
| S|SX| X|
|
||||
--+--+--+--+
|
||||
S| +| +| -|
|
||||
--+--+--+--+
|
||||
SX| +| -| -|
|
||||
--+--+--+--+
|
||||
X| -| -| -|
|
||||
--+--+--+--+
|
||||
|
||||
The lock_word is always read and updated atomically and consistently, so that
|
||||
it always represents the state of the lock, and the state of the lock changes
|
||||
|
Reference in New Issue
Block a user