diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 8e8229d6630..ceb6ac93ff5 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -622,6 +622,7 @@ void **thd_ha_data(const MYSQL_THD thd, const struct handlerton *hton); void thd_storage_lock_wait(MYSQL_THD thd, long long value); int thd_tx_isolation(const MYSQL_THD thd); int thd_tx_is_read_only(const MYSQL_THD thd); +int thd_rpl_is_parallel(const MYSQL_THD thd); /** Create a temporary file. diff --git a/include/mysql/plugin_audit.h.pp b/include/mysql/plugin_audit.h.pp index dff4dac460d..98fd089570d 100644 --- a/include/mysql/plugin_audit.h.pp +++ b/include/mysql/plugin_audit.h.pp @@ -303,6 +303,7 @@ void **thd_ha_data(const void* thd, const struct handlerton *hton); void thd_storage_lock_wait(void* thd, long long value); int thd_tx_isolation(const void* thd); int thd_tx_is_read_only(const void* thd); +int thd_rpl_is_parallel(const void* thd); int mysql_tmpfile(const char *prefix); unsigned long thd_get_thread_id(const void* thd); void thd_get_xid(const void* thd, MYSQL_XID *xid); diff --git a/include/mysql/plugin_auth.h.pp b/include/mysql/plugin_auth.h.pp index b6a02fe8be9..6d52c5be7f0 100644 --- a/include/mysql/plugin_auth.h.pp +++ b/include/mysql/plugin_auth.h.pp @@ -303,6 +303,7 @@ void **thd_ha_data(const void* thd, const struct handlerton *hton); void thd_storage_lock_wait(void* thd, long long value); int thd_tx_isolation(const void* thd); int thd_tx_is_read_only(const void* thd); +int thd_rpl_is_parallel(const void* thd); int mysql_tmpfile(const char *prefix); unsigned long thd_get_thread_id(const void* thd); void thd_get_xid(const void* thd, MYSQL_XID *xid); diff --git a/include/mysql/plugin_ftparser.h.pp b/include/mysql/plugin_ftparser.h.pp index 4db34f1a380..cb3e7cafc97 100644 --- a/include/mysql/plugin_ftparser.h.pp +++ b/include/mysql/plugin_ftparser.h.pp @@ -256,6 +256,7 @@ void **thd_ha_data(const void* thd, const struct handlerton *hton); void thd_storage_lock_wait(void* thd, long long value); int thd_tx_isolation(const void* thd); int thd_tx_is_read_only(const void* thd); +int thd_rpl_is_parallel(const void* thd); int mysql_tmpfile(const char *prefix); unsigned long thd_get_thread_id(const void* thd); void thd_get_xid(const void* thd, MYSQL_XID *xid); diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index 3784ccb4dec..7308e13eef4 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -761,11 +761,51 @@ a b 110 1 111 2 112 3 +***MDEV-5914: Parallel replication deadlock due to InnoDB lock conflicts *** +include/stop_slave.inc +CREATE TABLE t4 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB; +INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +UPDATE t4 SET b=NULL WHERE a=6; +SET debug_sync='now WAIT_FOR master_queued1'; +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +DELETE FROM t4 WHERE b <= 3; +SET debug_sync='now WAIT_FOR master_queued2'; +SET debug_sync='now SIGNAL master_cont1'; +SET debug_sync='RESET'; +include/start_slave.inc +include/stop_slave.inc +SELECT * FROM t4 ORDER BY a; +a b +1 NULL +3 NULL +4 4 +5 NULL +6 NULL +DELETE FROM t4; +INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +INSERT INTO t4 VALUES (7, NULL); +SET debug_sync='now WAIT_FOR master_queued1'; +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +DELETE FROM t4 WHERE b <= 3; +SET debug_sync='now WAIT_FOR master_queued2'; +SET debug_sync='now SIGNAL master_cont1'; +SET debug_sync='RESET'; +include/start_slave.inc +SELECT * FROM t4 ORDER BY a; +a b +1 NULL +3 NULL +4 4 +5 NULL +6 6 +7 NULL include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc SET DEBUG_SYNC= 'RESET'; DROP function foo; -DROP TABLE t1,t2,t3; +DROP TABLE t1,t2,t3,t4; SET DEBUG_SYNC= 'RESET'; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index 6bf339edb3a..2742a2089f8 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -1172,6 +1172,84 @@ START SLAVE SQL_THREAD; SELECT * FROM t3 WHERE a >= 110 ORDER BY a; +--echo ***MDEV-5914: Parallel replication deadlock due to InnoDB lock conflicts *** +--connection server_2 +--source include/stop_slave.inc + +--connection server_1 +CREATE TABLE t4 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB; +INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); + +# Create a group commit with UPDATE and DELETE, in that order. +# The bug was that while the UPDATE's row lock does not block the DELETE, the +# DELETE's gap lock _does_ block the UPDATE. This could cause a deadlock +# on the slave. +--connection con1 +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +send UPDATE t4 SET b=NULL WHERE a=6; +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued1'; + +--connection con2 +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +send DELETE FROM t4 WHERE b <= 3; + +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued2'; +SET debug_sync='now SIGNAL master_cont1'; + +--connection con1 +REAP; +--connection con2 +REAP; +SET debug_sync='RESET'; +--save_master_pos + +--connection server_2 +--source include/start_slave.inc +--sync_with_master +--source include/stop_slave.inc + +SELECT * FROM t4 ORDER BY a; + + +# Another example, this one with INSERT vs. DELETE +--connection server_1 +DELETE FROM t4; +INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); + +# Create a group commit with INSERT and DELETE, in that order. +# The bug was that while the INSERT's insert intention lock does not block +# the DELETE, the DELETE's gap lock _does_ block the INSERT. This could cause +# a deadlock on the slave. +--connection con1 +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +send INSERT INTO t4 VALUES (7, NULL); +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued1'; + +--connection con2 +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +send DELETE FROM t4 WHERE b <= 3; + +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued2'; +SET debug_sync='now SIGNAL master_cont1'; + +--connection con1 +REAP; +--connection con2 +REAP; +SET debug_sync='RESET'; +--save_master_pos + +--connection server_2 +--source include/start_slave.inc +--sync_with_master + +SELECT * FROM t4 ORDER BY a; + + --connection server_2 --source include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; @@ -1180,7 +1258,7 @@ SET DEBUG_SYNC= 'RESET'; --connection server_1 DROP function foo; -DROP TABLE t1,t2,t3; +DROP TABLE t1,t2,t3,t4; SET DEBUG_SYNC= 'RESET'; --source include/rpl_end.inc diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index af797d55da3..5bc22d7304e 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -242,6 +242,39 @@ handle_rpl_parallel_thread(void *arg) thd_proc_info(thd, "Waiting for work from main SQL threads"); thd->set_time(); thd->variables.lock_wait_timeout= LONG_TIMEOUT; + /* + For now, we need to run the replication parallel worker threads in + READ COMMITTED. This is needed because gap locks are not symmetric. + For example, a gap lock from a DELETE blocks an insert intention lock, + but not vice versa. So an INSERT followed by DELETE can group commit + on the master, but if we are unlucky with thread scheduling we can + then deadlock on the slave because the INSERT ends up waiting for a + gap lock from the DELETE (and the DELETE in turn waits for the INSERT + in wait_for_prior_commit()). See also MDEV-5914. + + It should be mostly safe to run in READ COMMITTED in the slave anyway. + The commit order is already fixed from on the master, so we do not + risk logging into the binlog in an incorrect order between worker + threads (one that would cause different results if executed on a + lower-level slave that uses this slave as a master). The only + potential problem is with transactions run in a different master + connection (using multi-source replication), or run directly on the + slave by an application; when using READ COMMITTED we are not + guaranteed serialisability of binlogged statements. + + In practice, this is unlikely to be an issue. In GTID mode, such + parallel transactions from multi-source or application must in any + case use a different replication domain, in which case binlog order + by definition must be independent between the different domain. Even + in non-GTID mode, normally one will assume that the external + transactions are not conflicting with those applied by the slave, so + that isolation level should make no difference. It would be rather + strange if the result of applying query events from one master would + depend on the timing and nature of other queries executed from + different multi-source connections or done directly on the slave by + an application. Still, something to be aware of. + */ + thd->variables.tx_isolation= ISO_READ_COMMITTED; mysql_mutex_lock(&rpt->LOCK_rpl_thread); rpt->thd= thd; @@ -306,6 +339,7 @@ handle_rpl_parallel_thread(void *arg) PSI_stage_info old_stage; uint64 wait_count; + thd->tx_isolation= (enum_tx_isolation)thd->variables.tx_isolation; in_event_group= true; /* If the standalone flag is set, then this event group consists of a diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 8a4a7006c63..e85a7d74851 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -4234,6 +4234,12 @@ extern "C" int thd_slave_thread(const MYSQL_THD thd) return(thd->slave_thread); } +/* Returns true for a worker thread in parallel replication. */ +extern "C" int thd_rpl_is_parallel(const MYSQL_THD thd) +{ + return thd->rgi_slave && thd->rgi_slave->is_parallel_exec; +} + extern "C" int thd_non_transactional_update(const MYSQL_THD thd) { return(thd->transaction.all.modified_non_trans_table); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index fb481a17d55..3557c5066e8 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4223,11 +4223,14 @@ handler::Table_flags ha_innobase::table_flags() const /*============================*/ { + THD *thd = ha_thd(); /* Need to use tx_isolation here since table flags is (also) called before prebuilt is inited. */ - ulong const tx_isolation = thd_tx_isolation(ha_thd()); + ulong const tx_isolation = thd_tx_isolation(thd); - if (tx_isolation <= ISO_READ_COMMITTED) { + if (tx_isolation <= ISO_READ_COMMITTED && + !(tx_isolation == ISO_READ_COMMITTED && + thd_rpl_is_parallel(thd))) { return(int_table_flags); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index d26c315d2eb..a40463d3bd8 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -4680,11 +4680,14 @@ handler::Table_flags ha_innobase::table_flags() const /*============================*/ { + THD *thd = ha_thd(); /* Need to use tx_isolation here since table flags is (also) called before prebuilt is inited. */ - ulong const tx_isolation = thd_tx_isolation(ha_thd()); + ulong const tx_isolation = thd_tx_isolation(thd); - if (tx_isolation <= ISO_READ_COMMITTED) { + if (tx_isolation <= ISO_READ_COMMITTED && + !(tx_isolation == ISO_READ_COMMITTED && + thd_rpl_is_parallel(thd))) { return(int_table_flags); }