diff --git a/mysql-test/main/partition_innodb_plugin.result b/mysql-test/main/partition_innodb_plugin.result index 6842ff04caa..39963e8c911 100644 --- a/mysql-test/main/partition_innodb_plugin.result +++ b/mysql-test/main/partition_innodb_plugin.result @@ -104,6 +104,10 @@ disconnect con2; connection default; SET @@global.innodb_strict_mode = @old_innodb_strict_mode; SET @@global.innodb_file_per_table = @old_innodb_file_per_table; +SET @save_detect= @@GLOBAL.innodb_deadlock_detect; +SET @save_report= @@GLOBAL.innodb_deadlock_report; +SET GLOBAL innodb_deadlock_detect=ON; +SET GLOBAL innodb_deadlock_report=BASIC; SET NAMES utf8; CREATE TABLE `t``\""e` (a INT, PRIMARY KEY (a)) ENGINE=InnoDB @@ -148,6 +152,8 @@ set sql_mode = 'ANSI_QUOTES'; SHOW ENGINE InnoDB STATUS; Type Name Status InnoDB index PRIMARY of table `test`.`t``\""e` /* Partition `p0``\""e`, Subpartition `sp0``\""e` */ +SET GLOBAL innodb_deadlock_detect= @save_detect; +SET GLOBAL innodb_deadlock_report= @save_report; set @@sql_mode = @old_sql_mode; connection con1; ROLLBACK; diff --git a/mysql-test/main/partition_innodb_plugin.test b/mysql-test/main/partition_innodb_plugin.test index 6d379b11756..bd96042c3f4 100644 --- a/mysql-test/main/partition_innodb_plugin.test +++ b/mysql-test/main/partition_innodb_plugin.test @@ -100,6 +100,10 @@ SET GLOBAL innodb_read_only_compressed=@save_innodb_read_only_compressed; # # Bug#32430 - show engine innodb status causes errors # +SET @save_detect= @@GLOBAL.innodb_deadlock_detect; +SET @save_report= @@GLOBAL.innodb_deadlock_report; +SET GLOBAL innodb_deadlock_detect=ON; +SET GLOBAL innodb_deadlock_report=BASIC; SET NAMES utf8; CREATE TABLE `t``\""e` (a INT, PRIMARY KEY (a)) ENGINE=InnoDB @@ -150,6 +154,8 @@ set @old_sql_mode = @@sql_mode; set sql_mode = 'ANSI_QUOTES'; --replace_regex /.*RECORD LOCKS space id [0-9]* page no [0-9]* n bits [0-9]* // / trx id .*// /.*index .* in // /trx table locks [0-9]* // /total table locks [0-9]* // SHOW ENGINE InnoDB STATUS; +SET GLOBAL innodb_deadlock_detect= @save_detect; +SET GLOBAL innodb_deadlock_report= @save_report; set @@sql_mode = @old_sql_mode; connection con1; REAP; diff --git a/mysql-test/suite/innodb/r/deadlock_detect,ON.rdiff b/mysql-test/suite/innodb/r/deadlock_detect,ON.rdiff new file mode 100644 index 00000000000..f3797baca86 --- /dev/null +++ b/mysql-test/suite/innodb/r/deadlock_detect,ON.rdiff @@ -0,0 +1,11 @@ +@@ -16,7 +16,10 @@ + connection default; + SELECT * FROM t1 WHERE id = 2 FOR UPDATE; + connection con2; ++connection con1; ++COMMIT; + disconnect con1; ++connection con2; + ROLLBACK; + disconnect con2; + connection default; diff --git a/mysql-test/suite/innodb/r/deadlock_detect.result b/mysql-test/suite/innodb/r/deadlock_detect.result index bb5ed8ca587..8131585aea2 100644 --- a/mysql-test/suite/innodb/r/deadlock_detect.result +++ b/mysql-test/suite/innodb/r/deadlock_detect.result @@ -16,9 +16,9 @@ SELECT * FROM t1 WHERE id = 1 FOR UPDATE; connection default; SELECT * FROM t1 WHERE id = 2 FOR UPDATE; connection con2; +disconnect con1; ROLLBACK; disconnect con2; -disconnect con1; connection default; ROLLBACK; DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/deadlock_detect.test b/mysql-test/suite/innodb/t/deadlock_detect.test index 06951d3a32f..e853790755a 100644 --- a/mysql-test/suite/innodb/t/deadlock_detect.test +++ b/mysql-test/suite/innodb/t/deadlock_detect.test @@ -39,14 +39,18 @@ connection con2; if (!$have_deadlock) { --error ER_LOCK_WAIT_TIMEOUT reap; +disconnect con1; } if ($have_deadlock) { ---error 0,ER_LOCK_DEADLOCK +connection con1; +COMMIT; +disconnect con1; +connection con2; +--error 0,ER_LOCK_DEADLOCK,ER_LOCK_WAIT_TIMEOUT reap; } ROLLBACK; disconnect con2; -disconnect con1; # # Note here that con1 is the older transaction as it diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff index f6a9305d373..23f82f513e3 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff @@ -9,7 +9,7 @@ VARIABLE_COMMENT Number of InnoDB Adaptive Hash Index Partitions (default 8) NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 512 -@@ -85,7 +85,7 @@ +@@ -73,7 +73,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -18,7 +18,7 @@ VARIABLE_COMMENT The AUTOINC lock modes supported by InnoDB: 0 => Old style AUTOINC locking (for backward compatibility); 1 => New style AUTOINC locking; 2 => No AUTOINC locking (unsafe for SBR) NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 2 -@@ -157,10 +157,10 @@ +@@ -97,10 +97,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 134217728 VARIABLE_SCOPE GLOBAL @@ -31,7 +31,7 @@ NUMERIC_BLOCK_SIZE 1048576 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -193,7 +193,7 @@ +@@ -133,7 +133,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 25 VARIABLE_SCOPE GLOBAL @@ -40,16 +40,7 @@ VARIABLE_COMMENT Dump only the hottest N% of each buffer pool, defaults to 25 NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 100 -@@ -229,7 +229,7 @@ - SESSION_VALUE NULL - DEFAULT_VALUE 0 - VARIABLE_SCOPE GLOBAL --VARIABLE_TYPE BIGINT UNSIGNED -+VARIABLE_TYPE INT UNSIGNED - VARIABLE_COMMENT Deprecated parameter with no effect. - NUMERIC_MIN_VALUE 0 - NUMERIC_MAX_VALUE 64 -@@ -289,7 +289,7 @@ +@@ -217,7 +217,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -58,7 +49,7 @@ VARIABLE_COMMENT A number between [0, 100] that tells how oftern buffer pool dump status in percentages should be printed. E.g. 10 means that buffer pool dump status is printed when every 10% of number of buffer pool pages are dumped. Default is 0 (only start and end status is printed). NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 100 -@@ -421,7 +421,7 @@ +@@ -337,7 +337,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 5 VARIABLE_SCOPE GLOBAL @@ -67,7 +58,7 @@ VARIABLE_COMMENT If the compression failure rate of a table is greater than this number more padding is added to the pages to reduce the failures. A value of zero implies no padding NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 100 -@@ -445,7 +445,7 @@ +@@ -361,7 +361,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 50 VARIABLE_SCOPE GLOBAL @@ -76,7 +67,7 @@ VARIABLE_COMMENT Percentage of empty space on a data page that can be reserved to make the page compressible. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 75 -@@ -745,7 +745,7 @@ +@@ -661,7 +661,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 600 VARIABLE_SCOPE GLOBAL @@ -85,7 +76,7 @@ VARIABLE_COMMENT Maximum number of seconds that semaphore times out in InnoDB. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 4294967295 -@@ -805,7 +805,7 @@ +@@ -709,7 +709,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 30 VARIABLE_SCOPE GLOBAL @@ -94,7 +85,7 @@ VARIABLE_COMMENT Number of iterations over which the background flushing is averaged. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 1000 -@@ -829,7 +829,7 @@ +@@ -733,7 +733,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -103,7 +94,7 @@ VARIABLE_COMMENT Controls the durability/speed trade-off for commits. Set to 0 (write and flush redo log to disk only once per second), 1 (flush to disk at each commit), 2 (write to log at commit but flush to disk only once per second) or 3 (flush to disk at prepare and at commit, slower and usually redundant). 1 and 3 guarantees that after a crash, committed transactions will not be lost and will be consistent with the binlog and other transactional engines. 2 can get inconsistent and lose transactions if there is a power failure or kernel crash but not if mysqld crashes. 0 has no guarantees in case of crash. 0 and 2 can be faster than 1 or 3. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 3 -@@ -853,7 +853,7 @@ +@@ -757,7 +757,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -112,7 +103,7 @@ VARIABLE_COMMENT Set to 0 (don't flush neighbors from buffer pool), 1 (flush contiguous neighbors from buffer pool) or 2 (flush neighbors from buffer pool), when flushing a block NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 2 -@@ -901,7 +901,7 @@ +@@ -805,7 +805,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -121,7 +112,7 @@ VARIABLE_COMMENT Helps to save your data in case the disk image of the database becomes corrupt. Value 5 can return bogus data, and 6 can permanently corrupt data. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 6 -@@ -925,7 +925,7 @@ +@@ -829,7 +829,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 8000000 VARIABLE_SCOPE GLOBAL @@ -130,7 +121,7 @@ VARIABLE_COMMENT InnoDB Fulltext search cache size in bytes NUMERIC_MIN_VALUE 1600000 NUMERIC_MAX_VALUE 80000000 -@@ -961,7 +961,7 @@ +@@ -865,7 +865,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 84 VARIABLE_SCOPE GLOBAL @@ -139,7 +130,7 @@ VARIABLE_COMMENT InnoDB Fulltext search maximum token size in characters NUMERIC_MIN_VALUE 10 NUMERIC_MAX_VALUE 84 -@@ -973,7 +973,7 @@ +@@ -877,7 +877,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 3 VARIABLE_SCOPE GLOBAL @@ -148,7 +139,7 @@ VARIABLE_COMMENT InnoDB Fulltext search minimum token size in characters NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 16 -@@ -985,7 +985,7 @@ +@@ -889,7 +889,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 2000 VARIABLE_SCOPE GLOBAL @@ -157,7 +148,7 @@ VARIABLE_COMMENT InnoDB Fulltext search number of words to optimize for each optimize table call NUMERIC_MIN_VALUE 1000 NUMERIC_MAX_VALUE 10000 -@@ -997,10 +997,10 @@ +@@ -901,10 +901,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 2000000000 VARIABLE_SCOPE GLOBAL @@ -170,7 +161,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1021,7 +1021,7 @@ +@@ -925,7 +925,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 2 VARIABLE_SCOPE GLOBAL @@ -179,7 +170,7 @@ VARIABLE_COMMENT InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 16 -@@ -1033,7 +1033,7 @@ +@@ -937,7 +937,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 640000000 VARIABLE_SCOPE GLOBAL @@ -188,7 +179,7 @@ VARIABLE_COMMENT Total memory allocated for InnoDB Fulltext Search cache NUMERIC_MIN_VALUE 32000000 NUMERIC_MAX_VALUE 1600000000 -@@ -1093,22 +1093,22 @@ +@@ -985,22 +985,22 @@ SESSION_VALUE NULL DEFAULT_VALUE 200 VARIABLE_SCOPE GLOBAL @@ -216,7 +207,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1153,7 +1153,7 @@ +@@ -1021,7 +1021,7 @@ SESSION_VALUE 50 DEFAULT_VALUE 50 VARIABLE_SCOPE SESSION @@ -225,7 +216,7 @@ VARIABLE_COMMENT Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 1073741824 -@@ -1165,10 +1165,10 @@ +@@ -1033,10 +1033,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 16777216 VARIABLE_SCOPE GLOBAL @@ -238,16 +229,7 @@ NUMERIC_BLOCK_SIZE 1024 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -1213,7 +1213,7 @@ - SESSION_VALUE NULL - DEFAULT_VALUE 1 - VARIABLE_SCOPE GLOBAL --VARIABLE_TYPE BIGINT UNSIGNED -+VARIABLE_TYPE INT UNSIGNED - VARIABLE_COMMENT Deprecated parameter with no effect. - NUMERIC_MIN_VALUE 1 - NUMERIC_MAX_VALUE 100 -@@ -1261,7 +1261,7 @@ +@@ -1081,7 +1081,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 8192 VARIABLE_SCOPE GLOBAL @@ -256,9 +238,9 @@ VARIABLE_COMMENT Redo log write ahead unit size to avoid read-on-write, it should match the OS cache block IO size NUMERIC_MIN_VALUE 512 NUMERIC_MAX_VALUE 16384 -@@ -1273,10 +1273,10 @@ +@@ -1093,10 +1093,10 @@ SESSION_VALUE NULL - DEFAULT_VALUE 100 + DEFAULT_VALUE 32 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED @@ -269,9 +251,9 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1285,10 +1285,10 @@ +@@ -1105,10 +1105,10 @@ SESSION_VALUE NULL - DEFAULT_VALUE 1024 + DEFAULT_VALUE 1536 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED @@ -282,7 +264,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1333,10 +1333,10 @@ +@@ -1153,10 +1153,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -295,7 +277,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1345,7 +1345,7 @@ +@@ -1165,7 +1165,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -304,7 +286,7 @@ VARIABLE_COMMENT Maximum delay of user threads in micro-seconds NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 10000000 -@@ -1465,10 +1465,10 @@ +@@ -1297,10 +1297,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -317,16 +299,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -1489,7 +1489,7 @@ - SESSION_VALUE NULL - DEFAULT_VALUE 0 - VARIABLE_SCOPE GLOBAL --VARIABLE_TYPE BIGINT UNSIGNED -+VARIABLE_TYPE INT UNSIGNED - VARIABLE_COMMENT Deprecated parameter with no effect. - NUMERIC_MIN_VALUE 0 - NUMERIC_MAX_VALUE 64 -@@ -1513,7 +1513,7 @@ +@@ -1333,7 +1333,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 16384 VARIABLE_SCOPE GLOBAL @@ -335,7 +308,7 @@ VARIABLE_COMMENT Page size to use for all InnoDB tablespaces. NUMERIC_MIN_VALUE 4096 NUMERIC_MAX_VALUE 65536 -@@ -1549,7 +1549,7 @@ +@@ -1369,7 +1369,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 300 VARIABLE_SCOPE GLOBAL @@ -344,7 +317,7 @@ VARIABLE_COMMENT Number of UNDO log pages to purge in one batch from the history list. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 5000 -@@ -1561,7 +1561,7 @@ +@@ -1381,7 +1381,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 128 VARIABLE_SCOPE GLOBAL @@ -353,7 +326,7 @@ VARIABLE_COMMENT Dictates rate at which UNDO records are purged. Value N means purge rollback segment(s) on every Nth iteration of purge invocation NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 128 -@@ -1597,7 +1597,7 @@ +@@ -1417,7 +1417,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 56 VARIABLE_SCOPE GLOBAL @@ -362,7 +335,7 @@ VARIABLE_COMMENT Number of pages that must be accessed sequentially for InnoDB to trigger a readahead. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 64 -@@ -1705,7 +1705,7 @@ +@@ -1501,7 +1501,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1048576 VARIABLE_SCOPE GLOBAL @@ -371,16 +344,7 @@ VARIABLE_COMMENT Memory buffer size for index creation NUMERIC_MIN_VALUE 65536 NUMERIC_MAX_VALUE 67108864 -@@ -1873,7 +1873,7 @@ - SESSION_VALUE NULL - DEFAULT_VALUE 1 - VARIABLE_SCOPE GLOBAL --VARIABLE_TYPE BIGINT UNSIGNED -+VARIABLE_TYPE INT UNSIGNED - VARIABLE_COMMENT Size of the mutex/lock wait array. - NUMERIC_MIN_VALUE 1 - NUMERIC_MAX_VALUE 1024 -@@ -1897,10 +1897,10 @@ +@@ -1669,10 +1669,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 30 VARIABLE_SCOPE GLOBAL @@ -393,16 +357,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -2005,7 +2005,7 @@ - SESSION_VALUE NULL - DEFAULT_VALUE 128 - VARIABLE_SCOPE GLOBAL --VARIABLE_TYPE BIGINT UNSIGNED -+VARIABLE_TYPE INT UNSIGNED - VARIABLE_COMMENT Deprecated parameter with no effect. - NUMERIC_MIN_VALUE 0 - NUMERIC_MAX_VALUE 128 -@@ -2029,7 +2029,7 @@ +@@ -1765,7 +1765,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index f23970cd6e5..3fee1d1288b 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -417,6 +417,18 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST OFF,ON READ_ONLY NO COMMAND_LINE_ARGUMENT NONE +VARIABLE_NAME INNODB_DEADLOCK_REPORT +SESSION_VALUE NULL +DEFAULT_VALUE full +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE ENUM +VARIABLE_COMMENT How to report deadlocks (if innodb_deadlock_detect=ON). +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST off,basic,full +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_DEFAULT_ENCRYPTION_KEY_ID SESSION_VALUE 1 DEFAULT_VALUE 1 diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 1fbdaeb94e1..335a4303de6 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -372,6 +372,26 @@ TYPELIB innodb_flush_method_typelib = { NULL }; +/** Names of allowed values of innodb_deadlock_report */ +static const char *innodb_deadlock_report_names[]= { + "off", /* Do not report any details of deadlocks */ + "basic", /* Report waiting transactions and lock requests */ + "full", /* Also report blocking locks */ + NullS +}; + +static_assert(Deadlock::REPORT_OFF == 0, "compatibility"); +static_assert(Deadlock::REPORT_BASIC == 1, "compatibility"); +static_assert(Deadlock::REPORT_FULL == 2, "compatibility"); + +/** Enumeration of innodb_deadlock_report */ +static TYPELIB innodb_deadlock_report_typelib = { + array_elements(innodb_deadlock_report_names) - 1, + "innodb_deadlock_report_typelib", + innodb_deadlock_report_names, + NULL +}; + /** Allowed values of innodb_change_buffering */ static const char* innodb_change_buffering_names[] = { "none", /* IBUF_USE_NONE */ @@ -4476,6 +4496,7 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels) trx->mutex_unlock(); } } + lock_sys.deadlock_check(); mysql_mutex_unlock(&lock_sys.wait_mutex); } } @@ -18601,13 +18622,18 @@ static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors, " when flushing a block", NULL, NULL, 1, 0, 2, 0); -static MYSQL_SYSVAR_BOOL(deadlock_detect, innobase_deadlock_detect, +static MYSQL_SYSVAR_BOOL(deadlock_detect, innodb_deadlock_detect, PLUGIN_VAR_NOCMDARG, "Enable/disable InnoDB deadlock detector (default ON)." " if set to OFF, deadlock detection is skipped," " and we rely on innodb_lock_wait_timeout in case of deadlock.", NULL, NULL, TRUE); +static MYSQL_SYSVAR_ENUM(deadlock_report, innodb_deadlock_report, + PLUGIN_VAR_RQCMDARG, + "How to report deadlocks (if innodb_deadlock_detect=ON).", + NULL, NULL, Deadlock::REPORT_FULL, &innodb_deadlock_report_typelib); + static MYSQL_SYSVAR_UINT(fill_factor, innobase_fill_factor, PLUGIN_VAR_RQCMDARG, "Percentage of B-tree page filled during bulk insert", @@ -19190,6 +19216,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(force_load_corrupted), MYSQL_SYSVAR(lock_wait_timeout), MYSQL_SYSVAR(deadlock_detect), + MYSQL_SYSVAR(deadlock_report), MYSQL_SYSVAR(page_size), MYSQL_SYSVAR(log_buffer_size), MYSQL_SYSVAR(log_file_size), diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index f81d7c65b84..b927f68d98f 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -43,7 +43,15 @@ Created 5/7/1996 Heikki Tuuri class ReadView; /** The value of innodb_deadlock_detect */ -extern my_bool innobase_deadlock_detect; +extern my_bool innodb_deadlock_detect; +/** The value of innodb_deadlock_report */ +extern ulong innodb_deadlock_report; + +namespace Deadlock +{ + /** The allowed values of innodb_deadlock_report */ + enum report { REPORT_OFF, REPORT_BASIC, REPORT_FULL }; +} /*********************************************************************//** Gets the heap_no of the smallest user record on a page. @@ -704,22 +712,21 @@ public: hash_table prdt_hash; /** page locks for SPATIAL INDEX */ hash_table prdt_page_hash; - /** number of deadlocks detected; protected by mutex */ - ulint deadlocks; /** mutex covering lock waits; @see trx_lock_t::wait_lock */ MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t wait_mutex; private: - /** Pending number of lock waits; protected by wait_mutex */ - ulint wait_pending; /** Cumulative number of lock waits; protected by wait_mutex */ ulint wait_count; + /** Pending number of lock waits; protected by wait_mutex */ + uint32_t wait_pending; /** Cumulative wait time; protected by wait_mutex */ - ulint wait_time; + uint32_t wait_time; /** Longest wait time; protected by wait_mutex */ - ulint wait_time_max; - + uint32_t wait_time_max; public: + /** number of deadlocks detected; protected by wait_mutex */ + ulint deadlocks; /** Constructor. @@ -821,6 +828,10 @@ public: void close(); + /** Check for deadlocks */ + static void deadlock_check(); + + /** Note that a record lock wait started */ inline void wait_start(); @@ -940,8 +951,8 @@ UNIV_INLINE lock_t* lock_rec_create( /*============*/ -#ifdef WITH_WSREP lock_t* c_lock, /*!< conflicting lock */ +#ifdef WITH_WSREP que_thr_t* thr, /*!< thread owning trx */ #endif unsigned type_mode,/*!< in: lock mode and wait flag */ @@ -961,6 +972,7 @@ void lock_rec_discard(lock_sys_t::hash_table &lock_hash, lock_t *in_lock); /** Create a new record lock and inserts it to the lock queue, without checking for deadlocks or conflicts. +@param[in] c_lock conflicting lock, or NULL @param[in] type_mode lock mode and wait flag @param[in] page_id index page number @param[in] page R-tree index page, or NULL @@ -971,8 +983,8 @@ without checking for deadlocks or conflicts. @return created lock */ lock_t* lock_rec_create_low( + lock_t* c_lock, #ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ que_thr_t* thr, /*!< thread owning trx */ #endif unsigned type_mode, @@ -985,6 +997,7 @@ lock_rec_create_low( /** Enqueue a waiting request for a lock which cannot be granted immediately. Check for deadlocks. +@param[in] c_lock conflicting lock @param[in] type_mode the requested lock mode (LOCK_S or LOCK_X) possibly ORed with LOCK_GAP or LOCK_REC_NOT_GAP, ORed with @@ -1002,9 +1015,7 @@ Check for deadlocks. @retval DB_DEADLOCK if this transaction was chosen as the victim */ dberr_t lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ -#endif + lock_t* c_lock, unsigned type_mode, const page_id_t id, const page_t* page, diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic index ebe15db18f8..1b293eba0ff 100644 --- a/storage/innobase/include/lock0lock.ic +++ b/storage/innobase/include/lock0lock.ic @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -60,8 +60,8 @@ UNIV_INLINE lock_t* lock_rec_create( /*============*/ -#ifdef WITH_WSREP lock_t* c_lock, /*!< conflicting lock */ +#ifdef WITH_WSREP que_thr_t* thr, /*!< thread owning trx */ #endif unsigned type_mode,/*!< in: lock mode and wait flag */ @@ -76,8 +76,9 @@ lock_rec_create( { btr_assert_not_corrupted(block, index); return lock_rec_create_low( + c_lock, #ifdef WITH_WSREP - c_lock, thr, + thr, #endif type_mode, block->page.id(), block->frame, heap_no, index, trx, caller_owns_trx_mutex); diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h index 5599412a394..80e253c468f 100644 --- a/storage/innobase/include/lock0priv.h +++ b/storage/innobase/include/lock0priv.h @@ -416,14 +416,6 @@ lock_rec_get_prev( const lock_t* in_lock,/*!< in: record lock */ ulint heap_no);/*!< in: heap number of the record */ -/*********************************************************************//** -Cancels a waiting lock request and releases possible other transactions -waiting behind it. */ -void -lock_cancel_waiting_and_release( -/*============================*/ - lock_t* lock); /*!< in/out: waiting lock request */ - /*********************************************************************//** Checks if some transaction has an implicit x-lock on a record in a clustered index. diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 1028611e88e..3be2f80673f 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -425,15 +425,13 @@ struct trx_lock_t trx->mutex, by the thread that is executing the transaction. Set to nullptr when holding lock_sys.wait_mutex. */ Atomic_relaxed wait_lock; + /** Transaction being waited for; protected by lock_sys.wait_mutex */ + trx_t *wait_trx; /** condition variable for !wait_lock; used with lock_sys.wait_mutex */ pthread_cond_t cond; /** lock wait start time, protected only by lock_sys.wait_mutex */ my_hrtime_t suspend_time; - /** DeadlockChecker::search() uses this to keep track of visited locks. - Protected by lock_sys.is_writer(). */ - uint64_t deadlock_mark; - #ifdef WITH_WSREP /** 2=high priority wsrep thread has marked this trx to abort; 1=another transaction chose this as a victim in deadlock resolution. */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 44b3f5dac9a..84b58907601 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -52,7 +52,9 @@ Created 5/7/1996 Heikki Tuuri #endif /* WITH_WSREP */ /** The value of innodb_deadlock_detect */ -my_bool innobase_deadlock_detect; +my_bool innodb_deadlock_detect; +/** The value of innodb_deadlock_report */ +ulong innodb_deadlock_report; #ifdef HAVE_REPLICATION extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd); @@ -253,197 +255,56 @@ static void lock_table_print(FILE* file, const lock_t* lock); @param[in,out] mtr mini-transaction for accessing the record */ static void lock_rec_print(FILE* file, const lock_t* lock, mtr_t& mtr); -/** Deadlock checker. */ -class DeadlockChecker { -public: - /** Check if a joining lock request results in a deadlock. - If a deadlock is found, we will resolve the deadlock by - choosing a victim transaction to be rolled it back. - We will attempt to resolve all deadlocks. +namespace Deadlock +{ + /** Whether to_check may be nonempty */ + static Atomic_relaxed to_be_checked; + /** Transactions to check for deadlock. Protected by lock_sys.wait_mutex. */ + static std::set to_check; - @param[in,out] trx transaction requesting a lock + MY_ATTRIBUTE((nonnull, warn_unused_result)) + /** Check if a lock request results in a deadlock. + Resolve a deadlock by choosing a transaction that will be rolled back. + @param trx transaction requesting a lock + @return whether trx must report DB_DEADLOCK */ + static bool check_and_resolve(trx_t *trx); - @return whether the transaction was chosen as victim */ - static bool check_and_resolve(trx_t *trx); - -private: - /** Do a shallow copy. Default destructor OK. - @param trx the start transaction (start node) - @param wait_lock lock that a transaction wants - @param mark_start visited node counter - @param report_waiters whether to call thd_rpl_deadlock_check() */ - DeadlockChecker( - trx_t* trx, - const lock_t* wait_lock, - ib_uint64_t mark_start, - bool report_waiters) - : -#ifdef HAVE_REPLICATION - m_report_waiters(report_waiters), -#endif - m_start(trx), - m_wait_lock(wait_lock), - m_mark_start(mark_start) - { - } - - /** Check if the search is too deep. */ - bool is_too_deep() const { - return m_n_elems > 200 || m_cost > 1000000; - } - - /** Save current state. - @param lock lock to push on the stack. - @param heap_no the heap number to push on the stack. - @return false if stack is full. */ - bool push(const lock_t* lock, ulint heap_no) - { - ut_ad(lock->is_table() == (heap_no == ULINT_UNDEFINED)); - - /* Ensure that the stack is bounded. */ - if (m_n_elems >= UT_ARR_SIZE(s_states)) { - return(false); - } - - state_t& state = s_states[m_n_elems++]; - - state.m_lock = lock; - state.m_wait_lock = m_wait_lock; - state.m_heap_no =heap_no; - - return(true); - } - - /** Restore state. - @param[out] lock current lock - @param[out] heap_no current heap_no */ - void pop(const lock_t*& lock, ulint& heap_no) - { - ut_a(m_n_elems > 0); - - const state_t& state = s_states[--m_n_elems]; - - lock = state.m_lock; - heap_no = state.m_heap_no; - m_wait_lock = state.m_wait_lock; - } - - /** Check whether the node has been visited. - @param lock lock to check - @return true if the node has been visited */ - bool is_visited(const lock_t* lock) const - { - return(lock->trx->lock.deadlock_mark > m_mark_start); - } - - /** Get the next lock in the queue that is owned by a transaction - whose sub-tree has not already been searched. - Note: "next" here means PREV for table locks. - @param lock Lock in queue - @param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED - @return next lock or NULL if at end of queue */ - const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const; - - /** Get the first lock to search. The search starts from the current - wait_lock. What we are really interested in is an edge from the - current wait_lock's owning transaction to another transaction that has - a lock ahead in the queue. We skip locks where the owning transaction's - sub-tree has already been searched. - - Note: The record locks are traversed from the oldest lock to the - latest. For table locks we go from latest to oldest. - - For record locks, we first position the iterator on first lock on - the page and then reposition on the actual heap_no. This is required - due to the way the record lock has is implemented. - - @param[out] heap_no if rec lock, else ULINT_UNDEFINED. - - @return first lock or NULL */ - const lock_t* get_first_lock(ulint* heap_no) const; - - /** Notify that a deadlock has been detected and print the conflicting - transaction info. - @param lock lock causing deadlock */ - void notify(const lock_t* lock) const; - - /** @return the victim transaction that should be rolled back */ - trx_t *select_victim() const; - - /** Looks iteratively for a deadlock. Note: the joining transaction - may have been granted its lock by the deadlock checks. - @return the victim transaction - @return nullptr if no deadlock */ - inline trx_t *search(); - - /** Print transaction data to the deadlock file and possibly to stderr. - @param trx transaction - @param max_query_len max query length to print */ - static void print(const trx_t* trx, ulint max_query_len); - - /** rewind(3) the file used for storing the latest detected deadlock - and print a heading message to stderr if printing of all deadlocks to - stderr is enabled. */ - static void start_print(); - - /** Print lock data to the deadlock file and possibly to stderr. - @param lock record or table type lock */ - static void print(const lock_t* lock); - - /** Print a message to the deadlock file and possibly to stderr. - @param msg message to print */ - static void print(const char* msg); - - /** Print info about transaction that was rolled back. - @param trx transaction rolled back - @param lock lock trx wants */ - static void rollback_print(const trx_t* trx, const lock_t* lock); - -private: - /** DFS state information, used during deadlock checking. */ - struct state_t { - const lock_t* m_lock; /*!< Current lock */ - const lock_t* m_wait_lock; /*!< Waiting for lock */ - ulint m_heap_no; /*!< heap number if rec lock */ - }; - - /** Used in deadlock tracking. Protected by lock_sys.latch. */ - static ib_uint64_t s_lock_mark_counter; - - /** Calculation steps thus far. It is the count of the nodes visited. */ - ulint m_cost= 0; - -#ifdef HAVE_REPLICATION - /** Set if thd_rpl_deadlock_check() should be called for waits. */ - const bool m_report_waiters; -#endif - - /** Joining transaction that is requesting a lock in an - incompatible mode */ - trx_t* const m_start; - - /** TRUE if search was too deep and was aborted */ - bool m_too_deep= false; - - /** Lock that trx wants */ - const lock_t* m_wait_lock; - - /** Value of lock_mark_count at the start of the deadlock check. */ - const ib_uint64_t m_mark_start; - - /** Number of states pushed onto the stack */ - size_t m_n_elems= 0; - - /** This is to avoid malloc/free calls. */ - static state_t s_states[4096]; + /** Quickly detect a deadlock using Brent's cycle detection algorithm. + @param trx transaction that is waiting for another transaction + @return a transaction that is part of a cycle + @retval nullptr if no cycle was found */ + inline trx_t *find_cycle(trx_t *trx) + { + mysql_mutex_assert_owner(&lock_sys.wait_mutex); + trx_t *tortoise= trx, *hare= trx; + for (unsigned power= 1, l= 1; (hare= hare->lock.wait_trx) != nullptr; l++) + { + if (tortoise == hare) + { + ut_ad(l > 1); + lock_sys.deadlocks++; + /* Note: Normally, trx should be part of any deadlock cycle + that is found. However, if innodb_deadlock_detect=OFF had been + in effect in the past, it is possible that trx will be waiting + for a transaction that participates in a pre-existing deadlock + cycle. In that case, our victim will not be trx. */ + return hare; + } + if (l == power) + { + /* The maximum concurrent number of TRX_STATE_ACTIVE transactions + is TRX_RSEG_N_SLOTS * 128, or innodb_page_size / 16 * 128 + (default: 131,072, maximum: 524,288). + Our maximum possible number of iterations should be twice that. */ + power<<= 1; + l= 0; + tortoise= hare; + } + } + return nullptr; + } }; -/** Counter to mark visited nodes during deadlock search. */ -ib_uint64_t DeadlockChecker::s_lock_mark_counter = 0; - -/** The stack used for deadlock searches. */ -DeadlockChecker::state_t DeadlockChecker::s_states[4096]; - #ifdef UNIV_DEBUG /** Validate the transactional locks. */ static void lock_validate(); @@ -613,6 +474,9 @@ void lock_sys_t::close() latch.destroy(); mysql_mutex_destroy(&wait_mutex); + Deadlock::to_check.clear(); + Deadlock::to_be_checked= false; + m_initialised= false; } @@ -1249,22 +1113,35 @@ static void lock_reset_lock_and_trx_wait(lock_t *lock) trx_t *trx= lock->trx; ut_ad(lock->is_waiting()); ut_ad(!trx->lock.wait_lock || trx->lock.wait_lock == lock); + if (trx_t *wait_trx= trx->lock.wait_trx) + Deadlock::to_check.erase(wait_trx); trx->lock.wait_lock= nullptr; + trx->lock.wait_trx= nullptr; lock->type_mode&= ~LOCK_WAIT; } #ifdef WITH_WSREP /** Set the wait status of a lock. @param[in,out] lock lock that will be waited for -@param[in,out] trx transaction that will wait for the lock */ -static void lock_set_lock_and_trx_wait(lock_t *lock, trx_t *trx) +@param[in,out] trx transaction that will wait for the lock +@param[in] ctrx holder of the conflicting lock */ +static void lock_set_lock_and_trx_wait(lock_t *lock, trx_t *trx, trx_t *ctrx) { ut_ad(lock); ut_ad(lock->trx == trx); - ut_ad(!trx->lock.wait_lock || trx->lock.wait_lock != lock); - ut_ad(!trx->lock.wait_lock || (*trx->lock.wait_lock).trx == trx); lock_sys.assert_locked(*lock); ut_ad(trx->mutex_is_owner()); + if (trx->lock.wait_trx) + { + ut_ad(trx->lock.wait_trx == ctrx); + ut_ad(trx->lock.wait_lock != lock); + ut_ad((*trx->lock.wait_lock).trx == trx); + } + else + { + trx->lock.wait_trx= ctrx; + ut_ad(!trx->lock.wait_lock); + } trx->lock.wait_lock= lock; lock->type_mode|= LOCK_WAIT; @@ -1316,7 +1193,7 @@ static bool lock_rec_create_wsrep(lock_t *c_lock, que_thr_t *thr, lock_t *lock, if (UNIV_UNLIKELY(wsrep_debug)) wsrep_print_wait_locks(c_lock); - lock_set_lock_and_trx_wait(lock, trx); + lock_set_lock_and_trx_wait(lock, trx, ctrx); UT_LIST_ADD_LAST(trx->lock.trx_locks, lock); trx->lock.wait_thr= thr; @@ -1381,6 +1258,7 @@ static void lock_table_create_wsrep(lock_t *c_lock, lock_t *lock, dict_table_t * /** Create a new record lock and inserts it to the lock queue, without checking for deadlocks or conflicts. +@param[in] c_lock conflicting lock @param[in] type_mode lock mode and wait flag @param[in] page_id index page number @param[in] page R-tree index page, or NULL @@ -1391,8 +1269,8 @@ without checking for deadlocks or conflicts. @return created lock */ lock_t* lock_rec_create_low( + lock_t* c_lock, #ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ que_thr_t* thr, /*!< thread owning trx */ #endif unsigned type_mode, @@ -1489,8 +1367,15 @@ lock_rec_create_low( HASH_INSERT(lock_t, hash, lock_hash, page_id.fold(), lock); if (type_mode & LOCK_WAIT) { - ut_ad(!trx->lock.wait_lock - || (*trx->lock.wait_lock).trx == trx); + if (trx->lock.wait_trx) { + ut_ad(!c_lock || trx->lock.wait_trx == c_lock->trx); + ut_ad(trx->lock.wait_lock); + ut_ad((*trx->lock.wait_lock).trx == trx); + } else { + ut_ad(c_lock); + trx->lock.wait_trx = c_lock->trx; + ut_ad(!trx->lock.wait_lock); + } trx->lock.wait_lock = lock; } UT_LIST_ADD_LAST(trx->lock.trx_locks, lock); @@ -1505,6 +1390,7 @@ lock_rec_create_low( /** Enqueue a waiting request for a lock which cannot be granted immediately. Check for deadlocks. +@param[in] c_lock conflicting lock @param[in] type_mode the requested lock mode (LOCK_S or LOCK_X) possibly ORed with LOCK_GAP or LOCK_REC_NOT_GAP, ORed with @@ -1522,9 +1408,7 @@ Check for deadlocks. @retval DB_DEADLOCK if this transaction was chosen as the victim */ dberr_t lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ -#endif + lock_t* c_lock, unsigned type_mode, const page_id_t id, const page_t* page, @@ -1563,8 +1447,9 @@ lock_rec_enqueue_waiting( /* Enqueue the lock request that will wait to be granted, note that we already own the trx mutex. */ lock_t* lock = lock_rec_create_low( + c_lock, #ifdef WITH_WSREP - c_lock, thr, + thr, #endif type_mode | LOCK_WAIT, id, page, heap_no, index, trx, true); @@ -1724,12 +1609,16 @@ lock_rec_add_to_queue( } create: - lock_rec_create_low( + /* Note: We will not pass any conflicting lock to lock_rec_create(), + because we should be moving an existing waiting lock request. */ + ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx); + + lock_rec_create_low(nullptr, #ifdef WITH_WSREP - NULL, NULL, + nullptr, #endif - type_mode, id, page, heap_no, index, trx, - caller_owns_trx_mutex); + type_mode, id, page, heap_no, index, trx, + caller_owns_trx_mutex); } /*********************************************************************//** @@ -1791,23 +1680,16 @@ lock_rec_lock( /* Do nothing if the trx already has a strong enough lock on rec */ if (!lock_rec_has_expl(mode, id, heap_no, trx)) { - if ( -#ifdef WITH_WSREP - lock_t *c_lock= -#endif - lock_rec_other_has_conflicting(mode, id, heap_no, trx)) - { + if (lock_t *c_lock= lock_rec_other_has_conflicting(mode, id, + heap_no, trx)) /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the - record, we have to wait. */ - err = lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - c_lock, -#endif /* WITH_WSREP */ - mode, id, block->frame, heap_no, index, thr, nullptr); - } + record, we have to wait. + */ + err= lock_rec_enqueue_waiting(c_lock, mode, id, block->frame, heap_no, + index, thr, nullptr); else if (!impl) { /* Set the requested lock on the record. */ @@ -1839,11 +1721,11 @@ lock_rec_lock( Note that we don't own the trx mutex. */ if (!impl) - lock_rec_create_low( + lock_rec_create_low(nullptr, #ifdef WITH_WSREP - nullptr, nullptr, + nullptr, #endif - mode, id, block->frame, heap_no, index, trx, false); + mode, id, block->frame, heap_no, index, trx, false); return DB_SUCCESS_LOCKED_REC; } @@ -1903,7 +1785,8 @@ void lock_sys_t::wait_resume(THD *thd, my_hrtime_t start, my_hrtime_t now) wait_pending--; if (now.val >= start.val) { - const ulint diff_time= static_cast((now.val - start.val) / 1000); + const uint32_t diff_time= + static_cast((now.val - start.val) / 1000); wait_time+= diff_time; if (diff_time > wait_time_max) @@ -1913,6 +1796,67 @@ void lock_sys_t::wait_resume(THD *thd, my_hrtime_t start, my_hrtime_t now) } } +#ifdef HAVE_REPLICATION +ATTRIBUTE_NOINLINE MY_ATTRIBUTE((nonnull)) +/** Report lock waits to parallel replication. +@param trx transaction that may be waiting for a lock +@param wait_lock lock that is being waited for */ +static void lock_wait_rpl_report(trx_t *trx) +{ + mysql_mutex_assert_owner(&lock_sys.wait_mutex); + ut_ad(trx->state == TRX_STATE_ACTIVE); + THD *const thd= trx->mysql_thd; + ut_ad(thd); + const lock_t *wait_lock= trx->lock.wait_lock; + if (!wait_lock) + return; + ut_ad(!(wait_lock->type_mode & LOCK_AUTO_INC)); + if (!lock_sys.wr_lock_try()) + { + mysql_mutex_unlock(&lock_sys.wait_mutex); + lock_sys.wr_lock(SRW_LOCK_CALL); + mysql_mutex_lock(&lock_sys.wait_mutex); + } + wait_lock= trx->lock.wait_lock; + if (!wait_lock) + { +func_exit: + lock_sys.wr_unlock(); + return; + } + ut_ad(wait_lock->is_waiting()); + ut_ad(!(wait_lock->type_mode & LOCK_AUTO_INC)); + + if (wait_lock->is_table()) + { + if (lock_t *lock= + UT_LIST_GET_FIRST(wait_lock->un_member.tab_lock.table->locks)) + { + do + if (!(lock->type_mode & LOCK_AUTO_INC) && + lock->trx->mysql_thd != thd) + thd_rpl_deadlock_check(thd, lock->trx->mysql_thd); + while ((lock= UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock))); + } + } + else if (lock_t *lock= + (wait_lock->type_mode & LOCK_PREDICATE + ? lock_sys.prdt_hash : lock_sys.rec_hash). + get_first(wait_lock->un_member.rec_lock.page_id)) + { + const ulint heap_no= lock_rec_find_set_bit(wait_lock); + if (!lock_rec_get_nth_bit(lock, heap_no)) + lock= lock_rec_get_next(heap_no, lock); + do + if (lock->trx->mysql_thd != thd) + thd_rpl_deadlock_check(thd, lock->trx->mysql_thd); + while ((lock= lock_rec_get_next(heap_no, lock))); + } + + goto func_exit; +} +#endif /* HAVE_REPLICATION */ + /** Wait for a lock to be released. @retval DB_DEADLOCK if this transaction was chosen as the deadlock victim @retval DB_INTERRUPTED if the execution was interrupted by the user @@ -1970,27 +1914,42 @@ dberr_t lock_wait(que_thr_t *thr) can only be initiated by the current thread which owns the transaction. */ if (const lock_t *wait_lock= trx->lock.wait_lock) { - static_assert(THD_WAIT_TABLE_LOCK != 0, "compatibility"); - static_assert(THD_WAIT_ROW_LOCK != 0, "compatibility"); - wait_for= wait_lock->is_table() ? THD_WAIT_TABLE_LOCK : THD_WAIT_ROW_LOCK; + const auto type_mode= wait_lock->type_mode; mysql_mutex_unlock(&lock_sys.wait_mutex); if (had_dict_lock) /* Release foreign key check latch */ row_mysql_unfreeze_data_dictionary(trx); + static_assert(THD_WAIT_TABLE_LOCK != 0, "compatibility"); + static_assert(THD_WAIT_ROW_LOCK != 0, "compatibility"); + wait_for= type_mode & LOCK_TABLE ? THD_WAIT_TABLE_LOCK : THD_WAIT_ROW_LOCK; + +#ifdef HAVE_REPLICATION + /* Even though lock_wait_rpl_report() has nothing to do with + deadlock detection, it was always disabled by innodb_deadlock_detect=OFF. + We will keep it in that way, because unfortunately + thd_need_wait_reports() will hold even if parallel (or any) replication + is not being used. We want to be allow the user to skip + lock_wait_rpl_report(). */ + const bool rpl= !(type_mode & LOCK_AUTO_INC) && trx->mysql_thd && + innodb_deadlock_detect && thd_need_wait_reports(trx->mysql_thd); +#endif + timespec abstime; set_timespec_time_nsec(abstime, suspend_time.val * 1000); abstime.MY_tv_sec+= innodb_lock_wait_timeout; thd_wait_begin(trx->mysql_thd, wait_for); - const bool deadlock= DeadlockChecker::check_and_resolve(trx); - if (deadlock) + if (Deadlock::check_and_resolve(trx)) + { trx->error_state= DB_DEADLOCK; - - mysql_mutex_lock(&lock_sys.wait_mutex); - - if (deadlock) goto end_wait; + } + +#ifdef HAVE_REPLICATION + if (rpl) + lock_wait_rpl_report(trx); +#endif while (trx->lock.wait_lock) { @@ -2054,12 +2013,14 @@ end_wait: trx->mutex_unlock(); } } + lock_sys.deadlock_check(); mysql_mutex_unlock(&lock_sys.wait_mutex); } return trx->error_state; } + /** Resume a lock wait */ static void lock_wait_end(trx_t *trx) { @@ -2169,19 +2130,28 @@ static void lock_rec_dequeue_from_page(lock_t *in_lock, bool owns_wait_mutex) if (!lock->is_waiting()) { continue; } + if (!owns_wait_mutex) { mysql_mutex_lock(&lock_sys.wait_mutex); acquired = owns_wait_mutex = true; } - const lock_t* c = lock_rec_has_to_wait_in_queue(lock); - if (!c) { + + ut_ad(lock->trx->lock.wait_trx); + ut_ad(lock->trx->lock.wait_lock); + + if (const lock_t* c = lock_rec_has_to_wait_in_queue(lock)) { + trx_t* c_trx = c->trx; + IF_WSREP(wsrep_assert_no_bf_bf_wait(c, lock, c_trx),); + lock->trx->lock.wait_trx = c_trx; + if (c_trx->lock.wait_trx + && innodb_deadlock_detect + && Deadlock::to_check.emplace(c_trx).second) { + Deadlock::to_be_checked = true; + } + } else { /* Grant the lock */ ut_ad(lock->trx != in_lock->trx); lock_grant(lock); -#ifdef WITH_WSREP - } else { - wsrep_assert_no_bf_bf_wait(c, lock, c->trx); -#endif /* WITH_WSREP */ } } @@ -3271,11 +3241,8 @@ lock_table_create( in dictionary cache */ unsigned type_mode,/*!< in: lock mode possibly ORed with LOCK_WAIT */ - trx_t* trx /*!< in: trx */ -#ifdef WITH_WSREP - , lock_t* c_lock = NULL /*!< in: conflicting lock */ -#endif - ) + trx_t* trx, /*!< in: trx */ + lock_t* c_lock) /*!< in: conflicting lock */ { lock_t* lock; @@ -3331,8 +3298,15 @@ allocated: ut_list_append(table->locks, lock, TableLockGetNode()); if (type_mode & LOCK_WAIT) { - ut_ad(!trx->lock.wait_lock - || (*trx->lock.wait_lock).trx == trx); + if (trx->lock.wait_trx) { + ut_ad(!c_lock || trx->lock.wait_trx == c_lock->trx); + ut_ad(trx->lock.wait_lock); + ut_ad((*trx->lock.wait_lock).trx == trx); + } else { + ut_ad(c_lock); + trx->lock.wait_trx = c_lock->trx; + ut_ad(!trx->lock.wait_lock); + } trx->lock.wait_lock = lock; } @@ -3490,11 +3464,8 @@ lock_table_enqueue_waiting( unsigned mode, /*!< in: lock mode this transaction is requesting */ dict_table_t* table, /*!< in/out: table */ - que_thr_t* thr /*!< in: query thread */ -#ifdef WITH_WSREP - , lock_t* c_lock /*!< in: conflicting lock or NULL */ -#endif -) + que_thr_t* thr, /*!< in: query thread */ + lock_t* c_lock) /*!< in: conflicting lock or NULL */ { lock_sys.assert_locked(*table); ut_ad(!srv_read_only_mode); @@ -3520,11 +3491,7 @@ lock_table_enqueue_waiting( #endif /* WITH_WSREP */ /* Enqueue the lock request that will wait to be granted */ - lock_table_create(table, mode | LOCK_WAIT, trx -#ifdef WITH_WSREP - , c_lock -#endif - ); + lock_table_create(table, mode | LOCK_WAIT, trx, c_lock); trx->lock.wait_thr = thr; trx->lock.was_chosen_as_deadlock_victim @@ -3656,13 +3623,9 @@ lock_table( trx->mutex_lock(); if (wait_for) { - err = lock_table_enqueue_waiting(mode, table, thr -#ifdef WITH_WSREP - , wait_for -#endif - ); + err = lock_table_enqueue_waiting(mode, table, thr, wait_for); } else { - lock_table_create(table, mode, trx); + lock_table_create(table, mode, trx, wait_for); } #ifdef WITH_WSREP @@ -3696,7 +3659,7 @@ void lock_table_resurrect(dict_table_t *table, trx_t *trx, lock_mode mode) ut_ad(!lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)); trx->mutex_lock(); - lock_table_create(table, mode, trx); + lock_table_create(table, mode, trx, nullptr); } trx->mutex_unlock(); } @@ -3755,7 +3718,6 @@ static void lock_table_dequeue(lock_t *in_lock, bool owns_wait_mutex) for (/* No op */; lock != NULL; lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { - if (!lock->is_waiting()) { continue; } @@ -3765,7 +3727,18 @@ static void lock_table_dequeue(lock_t *in_lock, bool owns_wait_mutex) acquired = owns_wait_mutex = true; } - if (!lock_table_has_to_wait_in_queue(lock)) { + ut_ad(lock->trx->lock.wait_trx); + ut_ad(lock->trx->lock.wait_lock); + + if (const lock_t* c = lock_table_has_to_wait_in_queue(lock)) { + trx_t* c_trx = c->trx; + lock->trx->lock.wait_trx = c_trx; + if (c_trx->lock.wait_trx + && innodb_deadlock_detect + && Deadlock::to_check.emplace(c_trx).second) { + Deadlock::to_be_checked = true; + } + } else { /* Grant the lock */ ut_ad(in_lock->trx != lock->trx); in_lock->trx->mutex_unlock(); @@ -3799,6 +3772,8 @@ void lock_table_x_unlock(dict_table_t *table, trx_t *trx) lock_sys.rd_lock(SRW_LOCK_CALL); table->lock_mutex_lock(); trx->mutex_lock(); + /* There is no need to check for new deadlocks, because only one + exclusive lock may exist on an object at a time. */ lock_table_dequeue(lock, false); trx->mutex_unlock(); table->lock_mutex_unlock(); @@ -3924,18 +3899,21 @@ released: if (!lock->is_waiting()) { continue; } - const lock_t* c = lock_rec_has_to_wait_in_queue(lock); - if (!c) { - /* Grant the lock */ - ut_ad(trx != lock->trx); - mysql_mutex_lock(&lock_sys.wait_mutex); - lock_grant(lock); - mysql_mutex_unlock(&lock_sys.wait_mutex); + mysql_mutex_lock(&lock_sys.wait_mutex); + ut_ad(lock->trx->lock.wait_trx); + ut_ad(lock->trx->lock.wait_lock); + + if (const lock_t* c = lock_rec_has_to_wait_in_queue(lock)) { #ifdef WITH_WSREP - } else { wsrep_assert_no_bf_bf_wait(c, lock, c->trx); #endif /* WITH_WSREP */ + lock->trx->lock.wait_trx = c->trx; + } else { + /* Grant the lock */ + ut_ad(trx != lock->trx); + lock_grant(lock); } + mysql_mutex_unlock(&lock_sys.wait_mutex); } } @@ -4061,6 +4039,12 @@ restart: goto restart; released: + if (UNIV_UNLIKELY(Deadlock::to_be_checked)) + { + mysql_mutex_lock(&lock_sys.wait_mutex); + lock_sys.deadlock_check(); + mysql_mutex_unlock(&lock_sys.wait_mutex); + } trx->lock.was_chosen_as_deadlock_victim= false; trx->lock.n_rec_locks= 0; } @@ -4936,16 +4920,12 @@ lock_rec_insert_check_and_lock( on the successor, which produced an unnecessary deadlock. */ const unsigned type_mode= LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION; - lock_t *c_lock= lock_rec_other_has_conflicting(type_mode, id, - heap_no, trx); - if (c_lock) + if(lock_t *c_lock= lock_rec_other_has_conflicting(type_mode, id, + heap_no, trx)) { trx->mutex_lock(); - err= lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - c_lock, -#endif - type_mode, id, block->frame, heap_no, index, thr, nullptr); + err= lock_rec_enqueue_waiting(c_lock, type_mode, id, block->frame, + heap_no, index, thr, nullptr); trx->mutex_unlock(); } } @@ -5599,11 +5579,7 @@ void lock_cancel_waiting_and_release(lock_t *lock) ut_ad(trx->state == TRX_STATE_ACTIVE); if (!lock->is_table()) - { - trx->mutex_unlock(); lock_rec_dequeue_from_page(lock, true); - trx->mutex_lock(); - } else { if (trx->autoinc_locks) @@ -5687,8 +5663,9 @@ lock_trx_handle_wait( trx->mutex_lock(); err= lock_trx_handle_wait_low(trx); } - mysql_mutex_unlock(&lock_sys.wait_mutex); trx->mutex_unlock(); + lock_sys.deadlock_check(); + mysql_mutex_unlock(&lock_sys.wait_mutex); return err; } @@ -5843,472 +5820,260 @@ bool lock_trx_has_expl_x_lock(const trx_t &trx, const dict_table_t &table, } #endif /* UNIV_DEBUG */ -/** rewind(3) the file used for storing the latest detected deadlock and -print a heading message to stderr if printing of all deadlocks to stderr -is enabled. */ -void -DeadlockChecker::start_print() +namespace Deadlock { - lock_sys.assert_locked(); - - rewind(lock_latest_err_file); - ut_print_timestamp(lock_latest_err_file); - - if (srv_print_all_deadlocks) { - ib::info() << "Transactions deadlock detected, dumping" - " detailed information."; - } -} - -/** Print a message to the deadlock file and possibly to stderr. -@param msg message to print */ -void -DeadlockChecker::print(const char* msg) -{ - fputs(msg, lock_latest_err_file); - - if (srv_print_all_deadlocks) { - ib::info() << msg; - } -} - -/** Print transaction data to the deadlock file and possibly to stderr. -@param trx transaction -@param max_query_len max query length to print */ -void -DeadlockChecker::print(const trx_t* trx, ulint max_query_len) -{ - lock_sys.assert_locked(); - - ulint n_rec_locks = trx->lock.n_rec_locks; - ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks); - ulint heap_size = mem_heap_get_size(trx->lock.lock_heap); - - trx_print_low(lock_latest_err_file, trx, max_query_len, - n_rec_locks, n_trx_locks, heap_size); - - if (srv_print_all_deadlocks) { - trx_print_low(stderr, trx, max_query_len, - n_rec_locks, n_trx_locks, heap_size); - } -} - -/** Print lock data to the deadlock file and possibly to stderr. -@param lock record or table type lock */ -void -DeadlockChecker::print(const lock_t* lock) -{ - lock_sys.assert_locked(); - - if (!lock->is_table()) { - mtr_t mtr; - lock_rec_print(lock_latest_err_file, lock, mtr); - - if (srv_print_all_deadlocks) { - lock_rec_print(stderr, lock, mtr); - } - } else { - lock_table_print(lock_latest_err_file, lock); - - if (srv_print_all_deadlocks) { - lock_table_print(stderr, lock); - } - } -} - -/** Get the next lock in the queue that is owned by a transaction whose -sub-tree has not already been searched. -Note: "next" here means PREV for table locks. - -@param lock Lock in queue -@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED - -@return next lock or NULL if at end of queue */ -const lock_t* -DeadlockChecker::get_next_lock(const lock_t* lock, ulint heap_no) const -{ - lock_sys.assert_locked(); - - do { - if (!lock->is_table()) { - ut_ad(heap_no != ULINT_UNDEFINED); - lock = lock_rec_get_next_const(heap_no, lock); - } else { - ut_ad(heap_no == ULINT_UNDEFINED); - - lock = UT_LIST_GET_NEXT( - un_member.tab_lock.locks, lock); - } - - } while (lock != NULL && is_visited(lock)); - - ut_ad(!lock || lock->is_table() == m_wait_lock->is_table()); - - return(lock); -} - -/** Get the first lock to search. The search starts from the current -wait_lock. What we are really interested in is an edge from the -current wait_lock's owning transaction to another transaction that has -a lock ahead in the queue. We skip locks where the owning transaction's -sub-tree has already been searched. - -Note: The record locks are traversed from the oldest lock to the -latest. For table locks we go from latest to oldest. - -For record locks, we first position the "iterator" on the first lock on -the page and then reposition on the actual heap_no. This is required -due to the way the record lock has is implemented. - -@param[out] heap_no if rec lock, else ULINT_UNDEFINED. -@return first lock or NULL */ -const lock_t* -DeadlockChecker::get_first_lock(ulint* heap_no) const -{ - lock_sys.assert_locked(); - - const lock_t* lock = m_wait_lock; - - if (!lock->is_table()) { - /* We are only interested in records that match the heap_no. */ - *heap_no = lock_rec_find_set_bit(lock); - - ut_ad(*heap_no <= 0xffff); - ut_ad(*heap_no != ULINT_UNDEFINED); - - /* Find the locks on the page. */ - lock = (lock->type_mode & LOCK_PREDICATE - ? lock_sys.prdt_hash - : lock_sys.rec_hash) - .get_first(lock->un_member.rec_lock.page_id); - - /* Position on the first lock on the physical record.*/ - if (!lock_rec_get_nth_bit(lock, *heap_no)) { - lock = lock_rec_get_next_const(*heap_no, lock); - } - - ut_a(!lock->is_waiting()); - } else { - /* Table locks don't care about the heap_no. */ - *heap_no = ULINT_UNDEFINED; - dict_table_t* table = lock->un_member.tab_lock.table; - lock = UT_LIST_GET_FIRST(table->locks); - } - - /* Must find at least two locks, otherwise there cannot be a - waiting lock, secondly the first lock cannot be the wait_lock. */ - ut_a(lock != NULL); - ut_a(lock != m_wait_lock); - - /* Check that the lock type doesn't change. */ - ut_ad(lock->is_table() == m_wait_lock->is_table()); - - return(lock); -} - -/** Notify that a deadlock has been detected and print the conflicting -transaction info. -@param lock lock causing deadlock */ -void -DeadlockChecker::notify(const lock_t* lock) const -{ - lock_sys.assert_locked(); - - start_print(); - - print("\n*** (1) TRANSACTION:\n"); - - print(m_wait_lock->trx, 3000); - - print("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n"); - - print(m_wait_lock); - - print("*** (2) TRANSACTION:\n"); - - print(lock->trx, 3000); - - print("*** (2) HOLDS THE LOCK(S):\n"); - - print(lock); - - /* It is possible that the joining transaction was granted its - lock when we rolled back some other waiting transaction. */ - - if (auto wait_lock= m_start->lock.wait_lock) { - print("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n"); - - print(wait_lock); - } - - DBUG_PRINT("ib_lock", ("deadlock detected")); -} - -/** Compare the "weight" (or size) of two transactions. Transactions that -have edited non-transactional tables are considered heavier than ones -that have not. -@return whether a is heavier than b */ -inline bool trx_weight_ge(const trx_t *a, const trx_t *b) -{ - bool a_notrans= a->mysql_thd && thd_has_edited_nontrans_tables(a->mysql_thd); - bool b_notrans= b->mysql_thd && thd_has_edited_nontrans_tables(b->mysql_thd); - return a_notrans != b_notrans ? a_notrans : TRX_WEIGHT(a) >= TRX_WEIGHT(b); -} - -/** @return the victim transaction that should be rolled back */ -trx_t *DeadlockChecker::select_victim() const -{ - lock_sys.assert_locked(); - trx_t *lock_trx= m_wait_lock->trx; - ut_ad(m_start->lock.wait_lock); - ut_ad(lock_trx != m_start); - - if (trx_weight_ge(lock_trx, m_start)) + /** rewind(3) the file used for storing the latest detected deadlock and + print a heading message to stderr if printing of all deadlocks to stderr + is enabled. */ + static void start_print() { -#ifdef WITH_WSREP - if (m_start->is_wsrep() && wsrep_thd_is_BF(m_start->mysql_thd, FALSE)) - return lock_trx; -#endif /* WITH_WSREP */ - /* The joining transaction is 'smaller', choose it as the victim */ - return m_start; + lock_sys.assert_locked(); + + rewind(lock_latest_err_file); + ut_print_timestamp(lock_latest_err_file); + + if (srv_print_all_deadlocks) + ib::info() << "Transactions deadlock detected," + " dumping detailed information."; } + /** Print a message to the deadlock file and possibly to stderr. + @param msg message to print */ + static void print(const char *msg) + { + fputs(msg, lock_latest_err_file); + if (srv_print_all_deadlocks) + ib::info() << msg; + } + + /** Print transaction data to the deadlock file and possibly to stderr. + @param trx transaction */ + static void print(const trx_t &trx) + { + lock_sys.assert_locked(); + + ulint n_rec_locks= trx.lock.n_rec_locks; + ulint n_trx_locks= UT_LIST_GET_LEN(trx.lock.trx_locks); + ulint heap_size= mem_heap_get_size(trx.lock.lock_heap); + + trx_print_low(lock_latest_err_file, &trx, 3000, + n_rec_locks, n_trx_locks, heap_size); + + if (srv_print_all_deadlocks) + trx_print_low(stderr, &trx, 3000, n_rec_locks, n_trx_locks, heap_size); + } + + /** Print lock data to the deadlock file and possibly to stderr. + @param lock record or table type lock */ + static void print(const lock_t &lock) + { + lock_sys.assert_locked(); + + if (!lock.is_table()) + { + mtr_t mtr; + lock_rec_print(lock_latest_err_file, &lock, mtr); + + if (srv_print_all_deadlocks) + lock_rec_print(stderr, &lock, mtr); + } + else + { + lock_table_print(lock_latest_err_file, &lock); + + if (srv_print_all_deadlocks) + lock_table_print(stderr, &lock); + } + } + + ATTRIBUTE_COLD + /** Report a deadlock (cycle in the waits-for graph). + @param trx transaction waiting for a lock in this thread + @param current_trx whether trx belongs to the current thread + @return the transaction to be rolled back (unless one was committed already) + @return nullptr if no deadlock */ + static trx_t *report(trx_t *const trx, bool current_trx= true) + { + mysql_mutex_unlock(&lock_sys.wait_mutex); + + /* Normally, trx should be a direct part of the deadlock + cycle. However, if innodb_deadlock_detect had been OFF in the + past, our trx may be waiting for a lock that is held by a + participant of a pre-existing deadlock, without being part of the + deadlock itself. That is, the path to the deadlock may be P-shaped + instead of O-shaped, with trx being at the foot of the P. + + We will process the entire path leading to a cycle, and we will + choose the victim (to be aborted) among the cycle. */ + + static const char rollback_msg[]= "*** WE ROLL BACK TRANSACTION (%u)\n"; + char buf[9 + sizeof rollback_msg]; + + /* trx is owned by this thread; we can safely call these without + holding any mutex */ + const undo_no_t trx_weight= TRX_WEIGHT(trx) | + (trx->mysql_thd && thd_has_edited_nontrans_tables(trx->mysql_thd) + ? 1ULL << 63 : 0); + trx_t *victim= nullptr; + undo_no_t victim_weight= ~0ULL; + unsigned victim_pos= 0, trx_pos= 0; + { + unsigned l= 0; + LockMutexGuard g{SRW_LOCK_CALL}; + mysql_mutex_lock(&lock_sys.wait_mutex); + /* Now that we are holding lock_sys.wait_mutex again, check + whether a cycle still exists. */ + trx_t *cycle= find_cycle(trx); + if (!cycle) + return nullptr; /* One of the transactions was already aborted. */ + for (trx_t *next= cycle;;) + { + next= next->lock.wait_trx; + const undo_no_t next_weight= TRX_WEIGHT(next) | + (next->mysql_thd && thd_has_edited_nontrans_tables(next->mysql_thd) + ? 1ULL << 63 : 0); + if (next_weight < victim_weight) + { + victim_weight= next_weight; + victim= next; + victim_pos= l; + } + if (next == victim) + trx_pos= l; + if (next == cycle) + break; + } + + if (trx_pos && trx_weight == victim_weight) + { + victim= trx; + victim_pos= trx_pos; + } + + /* Finally, display the deadlock */ + switch (const auto r= static_cast(innodb_deadlock_report)) { + case REPORT_OFF: + break; + case REPORT_BASIC: + case REPORT_FULL: + start_print(); + l= 0; + + for (trx_t *next= cycle;;) + { + next= next->lock.wait_trx; + ut_ad(next); + ut_ad(next->state == TRX_STATE_ACTIVE); + const lock_t *wait_lock= next->lock.wait_lock; + ut_ad(wait_lock); + snprintf(buf, sizeof buf, "\n*** (%u) TRANSACTION:\n", ++l); + print(buf); + print(*next); + print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n"); + print(*wait_lock); + if (r == REPORT_BASIC); + else if (wait_lock->is_table()) + { + if (const lock_t *lock= + UT_LIST_GET_FIRST(wait_lock->un_member.tab_lock.table->locks)) + { + ut_ad(!lock->is_waiting()); + print("*** CONFLICTING WITH:\n"); + do + print(*lock); + while ((lock= UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) && + !lock->is_waiting()); + } + else + ut_ad("no conflicting table lock found" == 0); + } + else if (const lock_t *lock= + (wait_lock->type_mode & LOCK_PREDICATE + ? lock_sys.prdt_hash : lock_sys.rec_hash). + get_first(wait_lock->un_member.rec_lock.page_id)) + { + const ulint heap_no= lock_rec_find_set_bit(wait_lock); + if (!lock_rec_get_nth_bit(lock, heap_no)) + lock= lock_rec_get_next_const(heap_no, lock); + ut_ad(!lock->is_waiting()); + print("*** CONFLICTING WITH:\n"); + do + print(*lock); + while ((lock= lock_rec_get_next_const(heap_no, lock)) && + !lock->is_waiting()); + } + else + ut_ad("no conflicting record lock found" == 0); + + if (next == cycle) + break; + } + snprintf(buf, sizeof buf, rollback_msg, victim_pos); + print(buf); + } + + ut_ad(victim->state == TRX_STATE_ACTIVE); + + if (!current_trx || victim != trx) + { + victim->mutex_lock(); + victim->lock.was_chosen_as_deadlock_victim= true; + lock_cancel_waiting_and_release(victim->lock.wait_lock); + victim->mutex_unlock(); + } #ifdef WITH_WSREP - if (lock_trx->is_wsrep() && wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE)) - return m_start; -#endif /* WITH_WSREP */ + if (victim->is_wsrep() && wsrep_thd_is_SR(victim->mysql_thd)) + wsrep_handle_SR_rollback(trx->mysql_thd, victim->mysql_thd); +#endif + } - return lock_trx; + return victim; + } } -/** Looks iteratively for a deadlock. Note: the joining transaction may -have been granted its lock by the deadlock checks. -@return the victim transaction -@return nullptr if no deadlock */ -inline trx_t* DeadlockChecker::search() -{ - lock_sys.assert_locked(); - ut_ad(!m_start->mutex_is_owner()); - check_trx_state(m_wait_lock->trx); - ut_ad(m_mark_start <= s_lock_mark_counter); - - /* Look at the locks ahead of wait_lock in the lock queue. */ - ulint heap_no; - const lock_t* lock = get_first_lock(&heap_no); - - for (;;) { - /* We should never visit the same sub-tree more than once. */ - ut_ad(lock == NULL || !is_visited(lock)); - - while (m_n_elems > 0 && lock == NULL) { - - /* Restore previous search state. */ - - pop(lock, heap_no); - - lock = get_next_lock(lock, heap_no); - } - - if (lock == NULL) { - break; - } - - if (lock == m_wait_lock) { - - /* We can mark this subtree as searched */ - ut_ad(lock->trx->lock.deadlock_mark <= m_mark_start); - - lock->trx->lock.deadlock_mark = ++s_lock_mark_counter; - - /* We are not prepared for an overflow. This 64-bit - counter should never wrap around. At 10^9 increments - per second, it would take 10^3 years of uptime. */ - - ut_ad(s_lock_mark_counter > 0); - - /* Backtrack */ - lock = NULL; - continue; - } - - if (!lock_has_to_wait(m_wait_lock, lock)) { - /* No conflict, next lock */ - lock = get_next_lock(lock, heap_no); - continue; - } - - trx_t *trx = lock->trx; - - if (trx == m_start) { - /* Found a cycle. */ - notify(lock); - return select_victim(); - } - - if (is_too_deep()) { - /* Search too deep to continue. */ - m_too_deep = true; - return m_start; - } - -#ifdef HAVE_REPLICATION - /* We do not need to report autoinc locks to the upper - layer. These locks are released before commit, so they - can not cause deadlocks with binlog-fixed commit - order. */ - if (m_report_waiters && !(lock->type_mode & LOCK_AUTO_INC)) { - thd_rpl_deadlock_check(m_start->mysql_thd, - trx->mysql_thd); - } -#endif /* HAVE_REPLICATION */ - - if (!trx->lock.wait_thr) { - } else if (lock_t* wait_lock = trx->lock.wait_lock) { - /* Note: Because we are not holding - lock_sys.wait_mutex here, trx->lock.wait_lock - could be reset soon after we read it. */ - - /* Another trx ahead has requested a lock in an - incompatible mode, and is itself waiting for a lock. */ - - ++m_cost; - - if (!push(lock, heap_no)) { - m_too_deep = true; - return m_start; - } - - m_wait_lock = wait_lock; - - lock = get_first_lock(&heap_no); - - if (!is_visited(lock)) { - continue; - } - } - - lock = get_next_lock(lock, heap_no); - } - - ut_a(lock == NULL && m_n_elems == 0); - - /* No deadlock found. */ - return(0); -} - -/** Print info about transaction that was rolled back. -@param trx transaction rolled back -@param lock lock trx wants */ -void -DeadlockChecker::rollback_print(const trx_t* trx, const lock_t* lock) -{ - lock_sys.assert_locked(); - - /* If the lock search exceeds the max step - or the max depth, the current trx will be - the victim. Print its information. */ - start_print(); - - print("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE" - " WAITS-FOR GRAPH, WE WILL ROLL BACK" - " FOLLOWING TRANSACTION \n\n" - "*** TRANSACTION:\n"); - - print(trx, 3000); - - print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n"); - - print(lock); -} - -/** Check if a joining lock request results in a deadlock. -If a deadlock is found, we will resolve the deadlock by -choosing a victim transaction to be rolled it back. -We will attempt to resolve all deadlocks. - -@param[in,out] trx transaction requesting a lock - -@return whether the transaction was chosen as victim */ -inline bool DeadlockChecker::check_and_resolve(trx_t *trx) +/** Check if a lock request results in a deadlock. +Resolve a deadlock by choosing a transaction that will be rolled back. +@param trx transaction requesting a lock +@return whether trx must report DB_DEADLOCK */ +static bool Deadlock::check_and_resolve(trx_t *trx) { ut_ad(!trx->mutex_is_owner()); ut_ad(trx->state == TRX_STATE_ACTIVE); ut_ad(!srv_read_only_mode); - if (!innobase_deadlock_detect || !trx->lock.wait_lock) + mysql_mutex_lock(&lock_sys.wait_mutex); + + if (!innodb_deadlock_detect) return false; -#ifdef HAVE_REPLICATION - const bool report_waiters= thd_need_wait_reports(trx->mysql_thd); -#else - const bool report_waiters= false; -#endif + if (UNIV_LIKELY(!find_cycle(trx))) + return trx->lock.was_chosen_as_deadlock_victim; - trx_t *victim_trx; - - for (;;) - { - LockMutexGuard g{SRW_LOCK_CALL}; // FIXME: only lock_sys.wait_mutex? - lock_t *lock= trx->lock.wait_lock; - - if (!lock) - return false; - - if (trx->lock.was_chosen_as_deadlock_victim) - return true; - - DeadlockChecker checker(trx, lock, s_lock_mark_counter, report_waiters && - !(lock->type_mode & LOCK_AUTO_INC)); - victim_trx= checker.search(); - - /* Search too deep, we rollback the joining transaction only if it - is possible to rollback. Otherwise we rollback the transaction - that is holding the lock that the joining transaction wants. */ - if (checker.is_too_deep()) - { - ut_ad(trx == checker.m_start); - ut_ad(trx == victim_trx); - rollback_print(trx, lock); -self_victim: - print("*** WE ROLL BACK TRANSACTION (2)\n"); - } - else if (victim_trx == trx) - goto self_victim; - else if (!victim_trx) - return false; - else - { - mysql_mutex_lock(&lock_sys.wait_mutex); - victim_trx->mutex_lock(); - const auto state= victim_trx->state; - if (state != TRX_STATE_ACTIVE) - { - ut_ad(state == TRX_STATE_COMMITTED_IN_MEMORY); - mysql_mutex_unlock(&lock_sys.wait_mutex); - victim_trx->mutex_unlock(); - return false; - } - print("*** WE ROLL BACK TRANSACTION (1)\n"); - victim_trx->lock.was_chosen_as_deadlock_victim - IF_WSREP(.fetch_or(1),= true); - lock_cancel_waiting_and_release(victim_trx->lock.wait_lock); - mysql_mutex_unlock(&lock_sys.wait_mutex); - victim_trx->mutex_unlock(); - } - lock_sys.deadlocks++; - break; - } - - ut_ad(trx->state == TRX_STATE_ACTIVE); - -#ifdef WITH_WSREP - if (victim_trx->is_wsrep() && wsrep_thd_is_SR(victim_trx->mysql_thd)) - wsrep_handle_SR_rollback(trx->mysql_thd, victim_trx->mysql_thd); -#endif - - return victim_trx == trx; + return report(trx) == trx || trx->lock.was_chosen_as_deadlock_victim; } + +/** Check for deadlocks */ +void lock_sys_t::deadlock_check() +{ + lock_sys.assert_unlocked(); + mysql_mutex_assert_owner(&lock_sys.wait_mutex); + if (Deadlock::to_be_checked) + { + while (!Deadlock::to_check.empty()) + { + auto i= Deadlock::to_check.begin(); + trx_t *trx= *i; + Deadlock::to_check.erase(i); + if (Deadlock::find_cycle(trx)) + Deadlock::report(trx, false); + } + Deadlock::to_be_checked= false; + } + ut_ad(Deadlock::to_check.empty()); +} + + /*************************************************************//** Updates the lock table when a page is split and merged to two pages. */ diff --git a/storage/innobase/lock/lock0prdt.cc b/storage/innobase/lock/lock0prdt.cc index cb6dde162ce..adc0d2c2dae 100644 --- a/storage/innobase/lock/lock0prdt.cc +++ b/storage/innobase/lock/lock0prdt.cc @@ -463,12 +463,16 @@ lock_prdt_add_to_queue( } create: - lock_t* lock = lock_rec_create( + /* Note: We will not pass any conflicting lock to lock_rec_create(), + because we should be moving an existing waiting lock request. */ + ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx); + + lock_t* lock = lock_rec_create(nullptr, #ifdef WITH_WSREP - NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */ + nullptr, #endif - type_mode, block, PRDT_HEAPNO, index, trx, - caller_owns_trx_mutex); + type_mode, block, PRDT_HEAPNO, index, + trx, caller_owns_trx_mutex); if (lock->type_mode & LOCK_PREDICATE) { lock_prdt_set_prdt(lock, prdt); @@ -529,11 +533,8 @@ lock_prdt_insert_check_and_lock( trx->mutex_lock(); /* Allocate MBR on the lock heap */ lock_init_prdt_from_mbr(prdt, mbr, 0, trx->lock.lock_heap); - err= lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - c_lock, -#endif - mode, id, block->frame, PRDT_HEAPNO, index, thr, prdt); + err= lock_rec_enqueue_waiting(c_lock, mode, id, block->frame, + PRDT_HEAPNO, index, thr, prdt); trx->mutex_unlock(); } } @@ -732,8 +733,9 @@ lock_prdt_lock( if (lock == NULL) { lock = lock_rec_create( + NULL, #ifdef WITH_WSREP - NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */ + NULL, /* FIXME: replicate SPATIAL INDEX locks */ #endif prdt_mode, block, PRDT_HEAPNO, index, trx, FALSE); @@ -762,10 +764,7 @@ lock_prdt_lock( if (wait_for != NULL) { err = lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - NULL, /* FIXME: replicate - SPATIAL INDEX locks */ -#endif + wait_for, prdt_mode, id, block->frame, PRDT_HEAPNO, index, thr, prdt); @@ -835,8 +834,9 @@ lock_place_prdt_page_lock( if (lock == NULL) { lock = lock_rec_create_low( + NULL, #ifdef WITH_WSREP - NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */ + NULL, /* FIXME: replicate SPATIAL INDEX locks */ #endif mode, page_id, NULL, PRDT_HEAPNO, index, trx, FALSE);