From b35296911886143267ea471530e701169e5f7f56 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 21 Mar 2014 13:30:55 +0100 Subject: [PATCH] MDEV-5914: Parallel replication deadlock due to InnoDB lock conflicts Due to how gap locks work, two transactions could group commit together on the master, but get lock conflicts and then deadlock due to different thread scheduling order on slave. For now, remove these deadlocks by running the parallel slave in READ COMMITTED mode. And let InnoDB/XtraDB allow statement-based binlogging for the parallel slave in READ COMMITTED. We are also investigating a different solution long-term, which is based on relaxing the gap locks only between the transactions running in parallel for one slave, but not against possibly external transactions. --- include/mysql/plugin.h | 1 + include/mysql/plugin_audit.h.pp | 1 + include/mysql/plugin_auth.h.pp | 1 + include/mysql/plugin_ftparser.h.pp | 1 + mysql-test/suite/rpl/r/rpl_parallel.result | 42 +++++++++++- mysql-test/suite/rpl/t/rpl_parallel.test | 80 +++++++++++++++++++++- sql/rpl_parallel.cc | 34 +++++++++ sql/sql_class.cc | 6 ++ storage/innobase/handler/ha_innodb.cc | 7 +- storage/xtradb/handler/ha_innodb.cc | 7 +- 10 files changed, 174 insertions(+), 6 deletions(-) diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 8e8229d6630..ceb6ac93ff5 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -622,6 +622,7 @@ void **thd_ha_data(const MYSQL_THD thd, const struct handlerton *hton); void thd_storage_lock_wait(MYSQL_THD thd, long long value); int thd_tx_isolation(const MYSQL_THD thd); int thd_tx_is_read_only(const MYSQL_THD thd); +int thd_rpl_is_parallel(const MYSQL_THD thd); /** Create a temporary file. diff --git a/include/mysql/plugin_audit.h.pp b/include/mysql/plugin_audit.h.pp index dff4dac460d..98fd089570d 100644 --- a/include/mysql/plugin_audit.h.pp +++ b/include/mysql/plugin_audit.h.pp @@ -303,6 +303,7 @@ void **thd_ha_data(const void* thd, const struct handlerton *hton); void thd_storage_lock_wait(void* thd, long long value); int thd_tx_isolation(const void* thd); int thd_tx_is_read_only(const void* thd); +int thd_rpl_is_parallel(const void* thd); int mysql_tmpfile(const char *prefix); unsigned long thd_get_thread_id(const void* thd); void thd_get_xid(const void* thd, MYSQL_XID *xid); diff --git a/include/mysql/plugin_auth.h.pp b/include/mysql/plugin_auth.h.pp index b6a02fe8be9..6d52c5be7f0 100644 --- a/include/mysql/plugin_auth.h.pp +++ b/include/mysql/plugin_auth.h.pp @@ -303,6 +303,7 @@ void **thd_ha_data(const void* thd, const struct handlerton *hton); void thd_storage_lock_wait(void* thd, long long value); int thd_tx_isolation(const void* thd); int thd_tx_is_read_only(const void* thd); +int thd_rpl_is_parallel(const void* thd); int mysql_tmpfile(const char *prefix); unsigned long thd_get_thread_id(const void* thd); void thd_get_xid(const void* thd, MYSQL_XID *xid); diff --git a/include/mysql/plugin_ftparser.h.pp b/include/mysql/plugin_ftparser.h.pp index 4db34f1a380..cb3e7cafc97 100644 --- a/include/mysql/plugin_ftparser.h.pp +++ b/include/mysql/plugin_ftparser.h.pp @@ -256,6 +256,7 @@ void **thd_ha_data(const void* thd, const struct handlerton *hton); void thd_storage_lock_wait(void* thd, long long value); int thd_tx_isolation(const void* thd); int thd_tx_is_read_only(const void* thd); +int thd_rpl_is_parallel(const void* thd); int mysql_tmpfile(const char *prefix); unsigned long thd_get_thread_id(const void* thd); void thd_get_xid(const void* thd, MYSQL_XID *xid); diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index 3784ccb4dec..7308e13eef4 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -761,11 +761,51 @@ a b 110 1 111 2 112 3 +***MDEV-5914: Parallel replication deadlock due to InnoDB lock conflicts *** +include/stop_slave.inc +CREATE TABLE t4 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB; +INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +UPDATE t4 SET b=NULL WHERE a=6; +SET debug_sync='now WAIT_FOR master_queued1'; +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +DELETE FROM t4 WHERE b <= 3; +SET debug_sync='now WAIT_FOR master_queued2'; +SET debug_sync='now SIGNAL master_cont1'; +SET debug_sync='RESET'; +include/start_slave.inc +include/stop_slave.inc +SELECT * FROM t4 ORDER BY a; +a b +1 NULL +3 NULL +4 4 +5 NULL +6 NULL +DELETE FROM t4; +INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +INSERT INTO t4 VALUES (7, NULL); +SET debug_sync='now WAIT_FOR master_queued1'; +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +DELETE FROM t4 WHERE b <= 3; +SET debug_sync='now WAIT_FOR master_queued2'; +SET debug_sync='now SIGNAL master_cont1'; +SET debug_sync='RESET'; +include/start_slave.inc +SELECT * FROM t4 ORDER BY a; +a b +1 NULL +3 NULL +4 4 +5 NULL +6 6 +7 NULL include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc SET DEBUG_SYNC= 'RESET'; DROP function foo; -DROP TABLE t1,t2,t3; +DROP TABLE t1,t2,t3,t4; SET DEBUG_SYNC= 'RESET'; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index 6bf339edb3a..2742a2089f8 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -1172,6 +1172,84 @@ START SLAVE SQL_THREAD; SELECT * FROM t3 WHERE a >= 110 ORDER BY a; +--echo ***MDEV-5914: Parallel replication deadlock due to InnoDB lock conflicts *** +--connection server_2 +--source include/stop_slave.inc + +--connection server_1 +CREATE TABLE t4 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB; +INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); + +# Create a group commit with UPDATE and DELETE, in that order. +# The bug was that while the UPDATE's row lock does not block the DELETE, the +# DELETE's gap lock _does_ block the UPDATE. This could cause a deadlock +# on the slave. +--connection con1 +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +send UPDATE t4 SET b=NULL WHERE a=6; +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued1'; + +--connection con2 +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +send DELETE FROM t4 WHERE b <= 3; + +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued2'; +SET debug_sync='now SIGNAL master_cont1'; + +--connection con1 +REAP; +--connection con2 +REAP; +SET debug_sync='RESET'; +--save_master_pos + +--connection server_2 +--source include/start_slave.inc +--sync_with_master +--source include/stop_slave.inc + +SELECT * FROM t4 ORDER BY a; + + +# Another example, this one with INSERT vs. DELETE +--connection server_1 +DELETE FROM t4; +INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); + +# Create a group commit with INSERT and DELETE, in that order. +# The bug was that while the INSERT's insert intention lock does not block +# the DELETE, the DELETE's gap lock _does_ block the INSERT. This could cause +# a deadlock on the slave. +--connection con1 +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +send INSERT INTO t4 VALUES (7, NULL); +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued1'; + +--connection con2 +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +send DELETE FROM t4 WHERE b <= 3; + +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued2'; +SET debug_sync='now SIGNAL master_cont1'; + +--connection con1 +REAP; +--connection con2 +REAP; +SET debug_sync='RESET'; +--save_master_pos + +--connection server_2 +--source include/start_slave.inc +--sync_with_master + +SELECT * FROM t4 ORDER BY a; + + --connection server_2 --source include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; @@ -1180,7 +1258,7 @@ SET DEBUG_SYNC= 'RESET'; --connection server_1 DROP function foo; -DROP TABLE t1,t2,t3; +DROP TABLE t1,t2,t3,t4; SET DEBUG_SYNC= 'RESET'; --source include/rpl_end.inc diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index af797d55da3..5bc22d7304e 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -242,6 +242,39 @@ handle_rpl_parallel_thread(void *arg) thd_proc_info(thd, "Waiting for work from main SQL threads"); thd->set_time(); thd->variables.lock_wait_timeout= LONG_TIMEOUT; + /* + For now, we need to run the replication parallel worker threads in + READ COMMITTED. This is needed because gap locks are not symmetric. + For example, a gap lock from a DELETE blocks an insert intention lock, + but not vice versa. So an INSERT followed by DELETE can group commit + on the master, but if we are unlucky with thread scheduling we can + then deadlock on the slave because the INSERT ends up waiting for a + gap lock from the DELETE (and the DELETE in turn waits for the INSERT + in wait_for_prior_commit()). See also MDEV-5914. + + It should be mostly safe to run in READ COMMITTED in the slave anyway. + The commit order is already fixed from on the master, so we do not + risk logging into the binlog in an incorrect order between worker + threads (one that would cause different results if executed on a + lower-level slave that uses this slave as a master). The only + potential problem is with transactions run in a different master + connection (using multi-source replication), or run directly on the + slave by an application; when using READ COMMITTED we are not + guaranteed serialisability of binlogged statements. + + In practice, this is unlikely to be an issue. In GTID mode, such + parallel transactions from multi-source or application must in any + case use a different replication domain, in which case binlog order + by definition must be independent between the different domain. Even + in non-GTID mode, normally one will assume that the external + transactions are not conflicting with those applied by the slave, so + that isolation level should make no difference. It would be rather + strange if the result of applying query events from one master would + depend on the timing and nature of other queries executed from + different multi-source connections or done directly on the slave by + an application. Still, something to be aware of. + */ + thd->variables.tx_isolation= ISO_READ_COMMITTED; mysql_mutex_lock(&rpt->LOCK_rpl_thread); rpt->thd= thd; @@ -306,6 +339,7 @@ handle_rpl_parallel_thread(void *arg) PSI_stage_info old_stage; uint64 wait_count; + thd->tx_isolation= (enum_tx_isolation)thd->variables.tx_isolation; in_event_group= true; /* If the standalone flag is set, then this event group consists of a diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 8a4a7006c63..e85a7d74851 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -4234,6 +4234,12 @@ extern "C" int thd_slave_thread(const MYSQL_THD thd) return(thd->slave_thread); } +/* Returns true for a worker thread in parallel replication. */ +extern "C" int thd_rpl_is_parallel(const MYSQL_THD thd) +{ + return thd->rgi_slave && thd->rgi_slave->is_parallel_exec; +} + extern "C" int thd_non_transactional_update(const MYSQL_THD thd) { return(thd->transaction.all.modified_non_trans_table); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index fb481a17d55..3557c5066e8 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4223,11 +4223,14 @@ handler::Table_flags ha_innobase::table_flags() const /*============================*/ { + THD *thd = ha_thd(); /* Need to use tx_isolation here since table flags is (also) called before prebuilt is inited. */ - ulong const tx_isolation = thd_tx_isolation(ha_thd()); + ulong const tx_isolation = thd_tx_isolation(thd); - if (tx_isolation <= ISO_READ_COMMITTED) { + if (tx_isolation <= ISO_READ_COMMITTED && + !(tx_isolation == ISO_READ_COMMITTED && + thd_rpl_is_parallel(thd))) { return(int_table_flags); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index d26c315d2eb..a40463d3bd8 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -4680,11 +4680,14 @@ handler::Table_flags ha_innobase::table_flags() const /*============================*/ { + THD *thd = ha_thd(); /* Need to use tx_isolation here since table flags is (also) called before prebuilt is inited. */ - ulong const tx_isolation = thd_tx_isolation(ha_thd()); + ulong const tx_isolation = thd_tx_isolation(thd); - if (tx_isolation <= ISO_READ_COMMITTED) { + if (tx_isolation <= ISO_READ_COMMITTED && + !(tx_isolation == ISO_READ_COMMITTED && + thd_rpl_is_parallel(thd))) { return(int_table_flags); }