mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-11336: Enable defragmentation on 10.2 when tests pass
Problem was that we could take page latches on different order than wat is entitled with SX-lock. To follow the latching order defined in WL#6326, acquire index->lock X-latch. This entitles us to acquire page latches in any order for the index. btr0btr.cc Document latch rules before and after MariaDB 10.2.2 sync0rw.cc Document latch compatibility rules better. btr_defragment_merge_pages Fix parameter value. btr_defragment_thread Acquire X-lock to dict_index_t::lock before restoring cursor position and continuing defragmentation. ha_innobase::optimize Restore defragment feature. Testing Add GIS-index and FT-index to table being defragmented. Defragmentation is not done to GIS-indexes and FT auxiliary tables.
This commit is contained in:
@ -10,14 +10,5 @@
|
|||||||
#
|
#
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
||||||
innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment
|
|
||||||
innodb.defrag_mdl-9155 : MDEV-11336 Fix and enable innodb_defragment
|
|
||||||
innodb.innodb_defrag_concurrent : MDEV-11336 Fix and enable innodb_defragment
|
|
||||||
innodb.innodb_defrag_stats : MDEV-11336 Fix and enable innodb_defragment
|
|
||||||
innodb.innodb_defrag_stats_many_tables : MDEV-11336 Fix and enable innodb_defragment
|
|
||||||
innodb.innodb_defragment : MDEV-11336 Fix and enable innodb_defragment
|
|
||||||
innodb.innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment
|
|
||||||
innodb.innodb_defragment_small : MDEV-11336 Fix and enable innodb_defragment
|
|
||||||
innodb.innodb_defrag_binlog : MDEV-11336 Fix and enable innodb_defragment
|
|
||||||
innodb-wl5980-alter : MDEV-9469 / MDEV-13668 extra crash in 10.2
|
innodb-wl5980-alter : MDEV-9469 / MDEV-13668 extra crash in 10.2
|
||||||
create-index-debug : MDEV-13680 InnoDB may crash when btr_page_alloc() fails
|
create-index-debug : MDEV-13680 InnoDB may crash when btr_page_alloc() fails
|
||||||
|
@ -3,7 +3,15 @@ select @@global.innodb_stats_persistent;
|
|||||||
@@global.innodb_stats_persistent
|
@@global.innodb_stats_persistent
|
||||||
0
|
0
|
||||||
set global innodb_defragment_stats_accuracy = 80;
|
set global innodb_defragment_stats_accuracy = 80;
|
||||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB;
|
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
b VARCHAR(256),
|
||||||
|
c INT,
|
||||||
|
g GEOMETRY NOT NULL,
|
||||||
|
t VARCHAR(256),
|
||||||
|
KEY second(a, b),
|
||||||
|
KEY third(c),
|
||||||
|
SPATIAL gk(g),
|
||||||
|
FULLTEXT INDEX fti(t)) ENGINE=INNODB;
|
||||||
connect con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
|
connect con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
|
||||||
connect con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
|
connect con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
|
||||||
connect con3,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
|
connect con3,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
|
||||||
@ -40,9 +48,9 @@ count(stat_value) > 0
|
|||||||
connection con1;
|
connection con1;
|
||||||
optimize table t1;;
|
optimize table t1;;
|
||||||
connection default;
|
connection default;
|
||||||
INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);;
|
INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.');;
|
||||||
connection con2;
|
connection con2;
|
||||||
INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);;
|
INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.');;
|
||||||
connection con3;
|
connection con3;
|
||||||
DELETE FROM t1 where a between 1 and 100;;
|
DELETE FROM t1 where a between 1 and 100;;
|
||||||
connection con4;
|
connection con4;
|
||||||
@ -59,6 +67,9 @@ disconnect con4;
|
|||||||
optimize table t1;
|
optimize table t1;
|
||||||
Table Op Msg_type Msg_text
|
Table Op Msg_type Msg_text
|
||||||
test.t1 optimize status OK
|
test.t1 optimize status OK
|
||||||
|
check table t1 extended;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t1 check status OK
|
||||||
select count(*) from t1;
|
select count(*) from t1;
|
||||||
count(*)
|
count(*)
|
||||||
15723
|
15723
|
||||||
|
@ -16,7 +16,26 @@ select @@global.innodb_stats_persistent;
|
|||||||
set global innodb_defragment_stats_accuracy = 80;
|
set global innodb_defragment_stats_accuracy = 80;
|
||||||
|
|
||||||
# Create table.
|
# Create table.
|
||||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB;
|
#
|
||||||
|
# TODO: Currently we do not defragment spatial indexes,
|
||||||
|
# because doing it properly would require
|
||||||
|
# appropriate logic around the SSN (split
|
||||||
|
# sequence number).
|
||||||
|
#
|
||||||
|
# Also do not defragment auxiliary tables related to FULLTEXT INDEX.
|
||||||
|
#
|
||||||
|
# Both types added to this test to make sure they do not cause
|
||||||
|
# problems.
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
b VARCHAR(256),
|
||||||
|
c INT,
|
||||||
|
g GEOMETRY NOT NULL,
|
||||||
|
t VARCHAR(256),
|
||||||
|
KEY second(a, b),
|
||||||
|
KEY third(c),
|
||||||
|
SPATIAL gk(g),
|
||||||
|
FULLTEXT INDEX fti(t)) ENGINE=INNODB;
|
||||||
|
|
||||||
connect (con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
connect (con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
||||||
connect (con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
connect (con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
||||||
@ -36,7 +55,7 @@ let $i = $data_size;
|
|||||||
while ($i)
|
while ($i)
|
||||||
{
|
{
|
||||||
eval
|
eval
|
||||||
INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i);
|
INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i, Point($i,$i), 'This is a test message.');
|
||||||
dec $i;
|
dec $i;
|
||||||
}
|
}
|
||||||
--enable_query_log
|
--enable_query_log
|
||||||
@ -69,10 +88,10 @@ connection con1;
|
|||||||
--send optimize table t1;
|
--send optimize table t1;
|
||||||
|
|
||||||
connection default;
|
connection default;
|
||||||
--send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);
|
--send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.');
|
||||||
|
|
||||||
connection con2;
|
connection con2;
|
||||||
--send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);
|
--send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.');
|
||||||
|
|
||||||
connection con3;
|
connection con3;
|
||||||
--send DELETE FROM t1 where a between 1 and 100;
|
--send DELETE FROM t1 where a between 1 and 100;
|
||||||
@ -103,6 +122,7 @@ disconnect con3;
|
|||||||
disconnect con4;
|
disconnect con4;
|
||||||
|
|
||||||
optimize table t1;
|
optimize table t1;
|
||||||
|
check table t1 extended;
|
||||||
|
|
||||||
select count(*) from t1;
|
select count(*) from t1;
|
||||||
select count(*) from t1 force index (second);
|
select count(*) from t1 force index (second);
|
||||||
|
@ -77,22 +77,85 @@ btr_corruption_report(
|
|||||||
/*
|
/*
|
||||||
Latching strategy of the InnoDB B-tree
|
Latching strategy of the InnoDB B-tree
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
A tree latch protects all non-leaf nodes of the tree. Each node of a tree
|
|
||||||
also has a latch of its own.
|
|
||||||
|
|
||||||
A B-tree operation normally first acquires an S-latch on the tree. It
|
Node pointer page latches acquisition is protected by index->lock latch.
|
||||||
searches down the tree and releases the tree latch when it has the
|
|
||||||
leaf node latch. To save CPU time we do not acquire any latch on
|
|
||||||
non-leaf nodes of the tree during a search, those pages are only bufferfixed.
|
|
||||||
|
|
||||||
If an operation needs to restructure the tree, it acquires an X-latch on
|
Before MariaDB 10.2.2, all node pointer pages were protected by index->lock
|
||||||
the tree before searching to a leaf node. If it needs, for example, to
|
either in S (shared) or X (exclusive) mode and block->lock was not acquired on
|
||||||
split a leaf,
|
node pointer pages.
|
||||||
(1) InnoDB decides the split point in the leaf,
|
|
||||||
(2) allocates a new page,
|
After MariaDB 10.2.2, block->lock S-latch or X-latch is used to protect
|
||||||
(3) inserts the appropriate node pointer to the first non-leaf level,
|
node pointer pages and obtaiment of node pointer page latches is protected by
|
||||||
(4) releases the tree X-latch,
|
index->lock.
|
||||||
(5) and then moves records from the leaf to the new allocated page.
|
|
||||||
|
(0) Definition: B-tree level.
|
||||||
|
|
||||||
|
(0.1) The leaf pages of the B-tree are at level 0.
|
||||||
|
|
||||||
|
(0.2) The parent of a page at level L has level L+1. (The level of the
|
||||||
|
root page is equal to the tree height.)
|
||||||
|
|
||||||
|
(0.3) The B-tree lock (index->lock) is the parent of the root page and
|
||||||
|
has a level = tree height + 1.
|
||||||
|
|
||||||
|
Index->lock has 3 possible locking modes:
|
||||||
|
|
||||||
|
(1) S-latch:
|
||||||
|
|
||||||
|
(1.1) All latches for pages must be obtained in descending order of tree level.
|
||||||
|
|
||||||
|
(1.2) Before obtaining the first node pointer page latch at a given B-tree
|
||||||
|
level, parent latch must be held (at level +1 ).
|
||||||
|
|
||||||
|
(1.3) If a node pointer page is already latched at the same level
|
||||||
|
we can only obtain latch to its right sibling page latch at the same level.
|
||||||
|
|
||||||
|
(1.4) Release of the node pointer page latches must be done in
|
||||||
|
child-to-parent order. (Prevents deadlocks when obtained index->lock
|
||||||
|
in SX mode).
|
||||||
|
|
||||||
|
(1.4.1) Level L node pointer page latch can be released only when
|
||||||
|
no latches at children level i.e. level < L are hold.
|
||||||
|
|
||||||
|
(1.4.2) All latches from node pointer pages must be released so
|
||||||
|
that no latches are obtained between.
|
||||||
|
|
||||||
|
(1.5) [implied by (1.1), (1.2)] Root page latch must be first node pointer
|
||||||
|
latch obtained.
|
||||||
|
|
||||||
|
(2) SX-latch:
|
||||||
|
|
||||||
|
In this case rules (1.2) and (1.3) from S-latch case are relaxed and
|
||||||
|
merged into (2.2) and rule (1.4) is removed. Thus, latch acquisition
|
||||||
|
can be skipped at some tree levels and latches can be obtained in
|
||||||
|
a less restricted order.
|
||||||
|
|
||||||
|
(2.1) [identical to (1.1)]: All latches for pages must be obtained in descending
|
||||||
|
order of tree level.
|
||||||
|
|
||||||
|
(2.2) When a node pointer latch at level L is obtained,
|
||||||
|
the left sibling page latch in the same level or some ancestor
|
||||||
|
page latch (at level > L) must be hold.
|
||||||
|
|
||||||
|
(2.3) [implied by (2.1), (2.2)] The first node pointer page latch obtained can
|
||||||
|
be any node pointer page.
|
||||||
|
|
||||||
|
(3) X-latch:
|
||||||
|
|
||||||
|
Node pointer latches can be obtained in any order.
|
||||||
|
|
||||||
|
NOTE: New rules after MariaDB 10.2.2 does not affect the latching rules of leaf pages:
|
||||||
|
|
||||||
|
index->lock S-latch is needed in read for the node pointer traversal. When the leaf
|
||||||
|
level is reached, index-lock can be released (and with the MariaDB 10.2.2 changes, all
|
||||||
|
node pointer latches). Left to right index travelsal in leaf page level can be safely done
|
||||||
|
by obtaining right sibling leaf page latch and then releasing the old page latch.
|
||||||
|
|
||||||
|
Single leaf page modifications (BTR_MODIFY_LEAF) are protected by index->lock
|
||||||
|
S-latch.
|
||||||
|
|
||||||
|
B-tree operations involving page splits or merges (BTR_MODIFY_TREE) and page
|
||||||
|
allocations are protected by index->lock X-latch.
|
||||||
|
|
||||||
Node pointers
|
Node pointers
|
||||||
-------------
|
-------------
|
||||||
|
@ -564,7 +564,7 @@ btr_defragment_merge_pages(
|
|||||||
page_get_infimum_rec(from_page));
|
page_get_infimum_rec(from_page));
|
||||||
node_ptr = dict_index_build_node_ptr(
|
node_ptr = dict_index_build_node_ptr(
|
||||||
index, rec, page_get_page_no(from_page),
|
index, rec, page_get_page_no(from_page),
|
||||||
heap, level + 1);
|
heap, level);
|
||||||
btr_insert_on_non_leaf_level(0, index, level+1,
|
btr_insert_on_non_leaf_level(0, index, level+1,
|
||||||
node_ptr, mtr);
|
node_ptr, mtr);
|
||||||
}
|
}
|
||||||
@ -797,11 +797,16 @@ DECLARE_THREAD(btr_defragment_thread)(void*)
|
|||||||
|
|
||||||
now = ut_timer_now();
|
now = ut_timer_now();
|
||||||
mtr_start(&mtr);
|
mtr_start(&mtr);
|
||||||
btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
|
|
||||||
cursor = btr_pcur_get_btr_cur(pcur);
|
cursor = btr_pcur_get_btr_cur(pcur);
|
||||||
index = btr_cur_get_index(cursor);
|
index = btr_cur_get_index(cursor);
|
||||||
first_block = btr_cur_get_block(cursor);
|
|
||||||
mtr.set_named_space(index->space);
|
mtr.set_named_space(index->space);
|
||||||
|
/* To follow the latching order defined in WL#6326, acquire index->lock X-latch.
|
||||||
|
This entitles us to acquire page latches in any order for the index. */
|
||||||
|
mtr_x_lock(&index->lock, &mtr);
|
||||||
|
/* This will acquire index->lock SX-latch, which per WL#6363 is allowed
|
||||||
|
when we are already holding the X-latch. */
|
||||||
|
btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
|
||||||
|
first_block = btr_cur_get_block(cursor);
|
||||||
|
|
||||||
last_block = btr_defragment_n_pages(first_block, index,
|
last_block = btr_defragment_n_pages(first_block, index,
|
||||||
srv_defragment_n_pages,
|
srv_defragment_n_pages,
|
||||||
|
@ -15066,7 +15066,7 @@ ha_innobase::optimize(
|
|||||||
calls to OPTIMIZE, which is undesirable. */
|
calls to OPTIMIZE, which is undesirable. */
|
||||||
|
|
||||||
/* TODO: Defragment is disabled for now */
|
/* TODO: Defragment is disabled for now */
|
||||||
if (0) {
|
if (srv_defragment) {
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
err = defragment_table(m_prebuilt->table->name.m_name, NULL, false);
|
err = defragment_table(m_prebuilt->table->name.m_name, NULL, false);
|
||||||
|
@ -84,10 +84,15 @@ lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR):
|
|||||||
2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR)
|
2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR)
|
||||||
|
|
||||||
LOCK COMPATIBILITY MATRIX
|
LOCK COMPATIBILITY MATRIX
|
||||||
S SX X
|
|
||||||
S + + -
|
| S|SX| X|
|
||||||
SX + - -
|
--+--+--+--+
|
||||||
X - - -
|
S| +| +| -|
|
||||||
|
--+--+--+--+
|
||||||
|
SX| +| -| -|
|
||||||
|
--+--+--+--+
|
||||||
|
X| -| -| -|
|
||||||
|
--+--+--+--+
|
||||||
|
|
||||||
The lock_word is always read and updated atomically and consistently, so that
|
The lock_word is always read and updated atomically and consistently, so that
|
||||||
it always represents the state of the lock, and the state of the lock changes
|
it always represents the state of the lock, and the state of the lock changes
|
||||||
|
Reference in New Issue
Block a user