diff --git a/mysql-test/suite/innodb/disabled.def b/mysql-test/suite/innodb/disabled.def index 9a92e99df2e..c435de278b9 100644 --- a/mysql-test/suite/innodb/disabled.def +++ b/mysql-test/suite/innodb/disabled.def @@ -10,14 +10,5 @@ # ############################################################################## -innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment -innodb.defrag_mdl-9155 : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defrag_concurrent : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defrag_stats : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defrag_stats_many_tables : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defragment : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defragment_small : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defrag_binlog : MDEV-11336 Fix and enable innodb_defragment innodb-wl5980-alter : MDEV-9469 / MDEV-13668 extra crash in 10.2 create-index-debug : MDEV-13680 InnoDB may crash when btr_page_alloc() fails diff --git a/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result b/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result index ff32bf694cb..d10727b95b4 100644 --- a/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result +++ b/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result @@ -3,7 +3,15 @@ select @@global.innodb_stats_persistent; @@global.innodb_stats_persistent 0 set global innodb_defragment_stats_accuracy = 80; -CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB; +CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, +b VARCHAR(256), +c INT, +g GEOMETRY NOT NULL, +t VARCHAR(256), +KEY second(a, b), +KEY third(c), +SPATIAL gk(g), +FULLTEXT INDEX fti(t)) ENGINE=INNODB; connect con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK; connect con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK; connect con3,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK; @@ -40,9 +48,9 @@ count(stat_value) > 0 connection con1; optimize table t1;; connection default; -INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);; +INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.');; connection con2; -INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);; +INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.');; connection con3; DELETE FROM t1 where a between 1 and 100;; connection con4; @@ -59,6 +67,9 @@ disconnect con4; optimize table t1; Table Op Msg_type Msg_text test.t1 optimize status OK +check table t1 extended; +Table Op Msg_type Msg_text +test.t1 check status OK select count(*) from t1; count(*) 15723 diff --git a/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test b/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test index f596fab2a15..bbcd72f1a3a 100644 --- a/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test +++ b/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test @@ -16,7 +16,26 @@ select @@global.innodb_stats_persistent; set global innodb_defragment_stats_accuracy = 80; # Create table. -CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB; +# +# TODO: Currently we do not defragment spatial indexes, +# because doing it properly would require +# appropriate logic around the SSN (split +# sequence number). +# +# Also do not defragment auxiliary tables related to FULLTEXT INDEX. +# +# Both types added to this test to make sure they do not cause +# problems. +# +CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, +b VARCHAR(256), +c INT, +g GEOMETRY NOT NULL, +t VARCHAR(256), +KEY second(a, b), +KEY third(c), +SPATIAL gk(g), +FULLTEXT INDEX fti(t)) ENGINE=INNODB; connect (con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK); connect (con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK); @@ -36,7 +55,7 @@ let $i = $data_size; while ($i) { eval - INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i); + INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i, Point($i,$i), 'This is a test message.'); dec $i; } --enable_query_log @@ -69,10 +88,10 @@ connection con1; --send optimize table t1; connection default; ---send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000); +--send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.'); connection con2; ---send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000); +--send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.'); connection con3; --send DELETE FROM t1 where a between 1 and 100; @@ -103,6 +122,7 @@ disconnect con3; disconnect con4; optimize table t1; +check table t1 extended; select count(*) from t1; select count(*) from t1 force index (second); diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index bf4a123c2ee..e638af8a217 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -77,22 +77,85 @@ btr_corruption_report( /* Latching strategy of the InnoDB B-tree -------------------------------------- -A tree latch protects all non-leaf nodes of the tree. Each node of a tree -also has a latch of its own. -A B-tree operation normally first acquires an S-latch on the tree. It -searches down the tree and releases the tree latch when it has the -leaf node latch. To save CPU time we do not acquire any latch on -non-leaf nodes of the tree during a search, those pages are only bufferfixed. +Node pointer page latches acquisition is protected by index->lock latch. -If an operation needs to restructure the tree, it acquires an X-latch on -the tree before searching to a leaf node. If it needs, for example, to -split a leaf, -(1) InnoDB decides the split point in the leaf, -(2) allocates a new page, -(3) inserts the appropriate node pointer to the first non-leaf level, -(4) releases the tree X-latch, -(5) and then moves records from the leaf to the new allocated page. +Before MariaDB 10.2.2, all node pointer pages were protected by index->lock +either in S (shared) or X (exclusive) mode and block->lock was not acquired on +node pointer pages. + +After MariaDB 10.2.2, block->lock S-latch or X-latch is used to protect +node pointer pages and obtaiment of node pointer page latches is protected by +index->lock. + +(0) Definition: B-tree level. + +(0.1) The leaf pages of the B-tree are at level 0. + +(0.2) The parent of a page at level L has level L+1. (The level of the +root page is equal to the tree height.) + +(0.3) The B-tree lock (index->lock) is the parent of the root page and +has a level = tree height + 1. + +Index->lock has 3 possible locking modes: + +(1) S-latch: + +(1.1) All latches for pages must be obtained in descending order of tree level. + +(1.2) Before obtaining the first node pointer page latch at a given B-tree +level, parent latch must be held (at level +1 ). + +(1.3) If a node pointer page is already latched at the same level +we can only obtain latch to its right sibling page latch at the same level. + +(1.4) Release of the node pointer page latches must be done in +child-to-parent order. (Prevents deadlocks when obtained index->lock +in SX mode). + +(1.4.1) Level L node pointer page latch can be released only when +no latches at children level i.e. level < L are hold. + +(1.4.2) All latches from node pointer pages must be released so +that no latches are obtained between. + +(1.5) [implied by (1.1), (1.2)] Root page latch must be first node pointer +latch obtained. + +(2) SX-latch: + +In this case rules (1.2) and (1.3) from S-latch case are relaxed and +merged into (2.2) and rule (1.4) is removed. Thus, latch acquisition +can be skipped at some tree levels and latches can be obtained in +a less restricted order. + +(2.1) [identical to (1.1)]: All latches for pages must be obtained in descending +order of tree level. + +(2.2) When a node pointer latch at level L is obtained, +the left sibling page latch in the same level or some ancestor +page latch (at level > L) must be hold. + +(2.3) [implied by (2.1), (2.2)] The first node pointer page latch obtained can +be any node pointer page. + +(3) X-latch: + +Node pointer latches can be obtained in any order. + +NOTE: New rules after MariaDB 10.2.2 does not affect the latching rules of leaf pages: + +index->lock S-latch is needed in read for the node pointer traversal. When the leaf +level is reached, index-lock can be released (and with the MariaDB 10.2.2 changes, all +node pointer latches). Left to right index travelsal in leaf page level can be safely done +by obtaining right sibling leaf page latch and then releasing the old page latch. + +Single leaf page modifications (BTR_MODIFY_LEAF) are protected by index->lock +S-latch. + +B-tree operations involving page splits or merges (BTR_MODIFY_TREE) and page +allocations are protected by index->lock X-latch. Node pointers ------------- diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 335b4fc220d..70444ca1830 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -564,7 +564,7 @@ btr_defragment_merge_pages( page_get_infimum_rec(from_page)); node_ptr = dict_index_build_node_ptr( index, rec, page_get_page_no(from_page), - heap, level + 1); + heap, level); btr_insert_on_non_leaf_level(0, index, level+1, node_ptr, mtr); } @@ -797,11 +797,16 @@ DECLARE_THREAD(btr_defragment_thread)(void*) now = ut_timer_now(); mtr_start(&mtr); - btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr); cursor = btr_pcur_get_btr_cur(pcur); index = btr_cur_get_index(cursor); - first_block = btr_cur_get_block(cursor); mtr.set_named_space(index->space); + /* To follow the latching order defined in WL#6326, acquire index->lock X-latch. + This entitles us to acquire page latches in any order for the index. */ + mtr_x_lock(&index->lock, &mtr); + /* This will acquire index->lock SX-latch, which per WL#6363 is allowed + when we are already holding the X-latch. */ + btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr); + first_block = btr_cur_get_block(cursor); last_block = btr_defragment_n_pages(first_block, index, srv_defragment_n_pages, diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 3374f46fb24..e7bf2b649f9 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -15066,7 +15066,7 @@ ha_innobase::optimize( calls to OPTIMIZE, which is undesirable. */ /* TODO: Defragment is disabled for now */ - if (0) { + if (srv_defragment) { int err; err = defragment_table(m_prebuilt->table->name.m_name, NULL, false); diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc index 6322b14335f..37b64910713 100644 --- a/storage/innobase/sync/sync0rw.cc +++ b/storage/innobase/sync/sync0rw.cc @@ -84,10 +84,15 @@ lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR): 2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR) LOCK COMPATIBILITY MATRIX - S SX X - S + + - - SX + - - - X - - - + + | S|SX| X| + --+--+--+--+ + S| +| +| -| + --+--+--+--+ + SX| +| -| -| + --+--+--+--+ + X| -| -| -| + --+--+--+--+ The lock_word is always read and updated atomically and consistently, so that it always represents the state of the lock, and the state of the lock changes