From 7dde504aef06fee8aa335045cdb478062cd2c2a0 Mon Sep 17 00:00:00 2001 From: Andrei Date: Wed, 5 Jul 2023 19:01:13 +0300 Subject: [PATCH] q# This is a combination of 2 commits. MDEV-31503 ALTER SEQUENCE ends up in optimistic parallel slave binlog out-of-order The OOO error still was possible even after MDEV-31077. This time it occured through open_table() when the sequence table was not in the table cache *and* the table was created before the last server restart. In such context a internal (read-only) transaction is committed and it was not blocked from doing a wakeup() call to subsequent transactions. Fixed with extending suspend_subsequent_commits() effect for the entirety of Sql_cmd_alter_sequence::execute(). An elaborated MDEV-31077 test proves the fixes of both failure scenarios. Also the bug condition suggests a workaround to pre-SELECT sequence tables before START SLAVE. Reviewed-by: Brandon Nesterenko --- .../suite/rpl/r/rpl_parallel_seq.result | 13 +++++- mysql-test/suite/rpl/t/rpl_parallel_seq.test | 21 +++++++++- sql/sql_sequence.cc | 41 ++++++++++++------- 3 files changed, 58 insertions(+), 17 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_parallel_seq.result b/mysql-test/suite/rpl/r/rpl_parallel_seq.result index 60061049ed4..f5b062236db 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel_seq.result +++ b/mysql-test/suite/rpl/r/rpl_parallel_seq.result @@ -47,11 +47,21 @@ Warnings: Note 1255 Slave already has been stopped RESET MASTER; SET @@global.gtid_slave_pos=""; -SET @@global.gtid_strict_mode=1; connection master; RESET MASTER; CREATE TABLE ti (a INT) ENGINE=innodb; CREATE SEQUENCE s2 ENGINE=innodb; +include/save_master_gtid.inc +connection slave; +include/start_slave.inc +include/sync_with_master_gtid.inc +include/stop_slave.inc +include/rpl_restart_server.inc [server_number=2] +SET @@global.slave_parallel_threads=2; +SET @@global.slave_parallel_mode=optimistic; +SET @@global.debug_dbug="+d,hold_worker_on_schedule"; +SET @@global.gtid_strict_mode=1; +connection master; SET @@gtid_seq_no=100; ALTER SEQUENCE s2 restart with 1; INSERT INTO ti SET a=1; @@ -60,6 +70,7 @@ SELECT @@global.gtid_binlog_state "Master gtid state"; Master gtid state 0-1-101 connection slave; +SET STATEMENT sql_log_bin=0 FOR FLUSH TABLES; include/start_slave.inc SELECT @@global.gtid_binlog_state, @@global.gtid_slave_pos as "no 100,101 yet in both"; @@global.gtid_binlog_state no 100,101 yet in both diff --git a/mysql-test/suite/rpl/t/rpl_parallel_seq.test b/mysql-test/suite/rpl/t/rpl_parallel_seq.test index 741859bc588..e975d079e52 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_seq.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_seq.test @@ -77,15 +77,28 @@ SET DEBUG_SYNC = 'now SIGNAL continue_worker'; --source include/stop_slave.inc RESET MASTER; SET @@global.gtid_slave_pos=""; ---let $slave_gtid_strict_mode=`select @@global.gtid_strict_mode` -SET @@global.gtid_strict_mode=1; --connection master RESET MASTER; # Load from master CREATE TABLE ti (a INT) ENGINE=innodb; CREATE SEQUENCE s2 ENGINE=innodb; +--source include/save_master_gtid.inc +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--let $rpl_server_number= 2 +--source include/rpl_restart_server.inc +# upon restart +SET @@global.slave_parallel_threads=2; +SET @@global.slave_parallel_mode=optimistic; +SET @@global.debug_dbug="+d,hold_worker_on_schedule"; +--let $slave_gtid_strict_mode=`select @@global.gtid_strict_mode` +SET @@global.gtid_strict_mode=1; + +--connection master SET @@gtid_seq_no=100; ALTER SEQUENCE s2 restart with 1; INSERT INTO ti SET a=1; @@ -93,6 +106,10 @@ INSERT INTO ti SET a=1; SELECT @@global.gtid_binlog_state "Master gtid state"; --connection slave +# The following FT complicates the opening table time with committing +# an internal transaction. The rest of the test also proves +# MDEV-31503 "branch" of the OOO error is fixed. +SET STATEMENT sql_log_bin=0 FOR FLUSH TABLES; --source include/start_slave.inc --let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for prior transaction to commit" diff --git a/sql/sql_sequence.cc b/sql/sql_sequence.cc index 2904749d1e6..4ba1e6092bd 100644 --- a/sql/sql_sequence.cc +++ b/sql/sql_sequence.cc @@ -900,6 +900,20 @@ end: DBUG_RETURN(error); } +#if defined(HAVE_REPLICATION) +class wait_for_commit_raii +{ +private: + THD *m_thd; + wait_for_commit *m_wfc; + +public: + wait_for_commit_raii(THD* thd) : + m_thd(thd), m_wfc(thd->suspend_subsequent_commits()) + {} + ~wait_for_commit_raii() { m_thd->resume_subsequent_commits(m_wfc); } +}; +#endif bool Sql_cmd_alter_sequence::execute(THD *thd) { @@ -912,7 +926,10 @@ bool Sql_cmd_alter_sequence::execute(THD *thd) SEQUENCE *seq; No_such_table_error_handler no_such_table_handler; DBUG_ENTER("Sql_cmd_alter_sequence::execute"); - +#if defined(HAVE_REPLICATION) + /* No wakeup():s of subsequent commits is allowed in this function. */ + wait_for_commit_raii suspend_wfc(thd); +#endif if (check_access(thd, ALTER_ACL, first_table->db.str, &first_table->grant.privilege, @@ -1010,19 +1027,15 @@ bool Sql_cmd_alter_sequence::execute(THD *thd) else table->file->print_error(error, MYF(0)); table->s->sequence->write_unlock(table); - { - wait_for_commit* suspended_wfc= thd->suspend_subsequent_commits(); - if (trans_commit_stmt(thd)) - error= 1; - if (trans_commit_implicit(thd)) - error= 1; - thd->resume_subsequent_commits(suspended_wfc); - DBUG_EXECUTE_IF("hold_worker_on_schedule", - { - /* delay binlogging of a parent trx in rpl_parallel_seq */ - my_sleep(100000); - }); - } + if (trans_commit_stmt(thd)) + error= 1; + if (trans_commit_implicit(thd)) + error= 1; + DBUG_EXECUTE_IF("hold_worker_on_schedule", + { + /* delay binlogging of a parent trx in rpl_parallel_seq */ + my_sleep(100000); + }); if (likely(!error)) error= write_bin_log(thd, 1, thd->query(), thd->query_length()); if (likely(!error))