diff --git a/extra/mariabackup/backup_mysql.cc b/extra/mariabackup/backup_mysql.cc index 162980acb21..04a4fb22e14 100644 --- a/extra/mariabackup/backup_mysql.cc +++ b/extra/mariabackup/backup_mysql.cc @@ -930,7 +930,7 @@ bool lock_tables(MYSQL *connection) if (have_galera_enabled) { - xb_mysql_query(connection, "SET SESSION wsrep_causal_reads=0", false); + xb_mysql_query(connection, "SET SESSION wsrep_sync_wait=0", false); } xb_mysql_query(connection, "BACKUP STAGE START", true); diff --git a/mysql-test/suite/galera/r/MDEV-22051.result b/mysql-test/suite/galera/r/MDEV-22051.result index 9f5394637c2..0e9756dd20e 100644 --- a/mysql-test/suite/galera/r/MDEV-22051.result +++ b/mysql-test/suite/galera/r/MDEV-22051.result @@ -2,14 +2,14 @@ connection node_2; connection node_1; FLUSH TABLES WITH READ LOCK; CREATE TABLE t1 (a INT) ENGINE=InnoDB; -ERROR 08S01: Aborting TOI: Global Read-Lock (FTWRL) in place. +ERROR 08S01: Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE. SET wsrep_OSU_method=RSU; CREATE TABLE t1 (a INT) ENGINE=InnoDB; -ERROR 08S01: Aborting TOI: Global Read-Lock (FTWRL) in place. +ERROR 08S01: Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE. SET wsrep_OSU_method=TOI; connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; CREATE TABLE t1 (a INT) ENGINE=InnoDB; -ERROR 08S01: Aborting TOI: Global Read-Lock (FTWRL) in place. +ERROR 08S01: Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE. connection node_1; UNLOCK TABLES; CREATE TABLE t1 (a INT) ENGINE=InnoDB; diff --git a/mysql-test/suite/galera/r/galera_backup_stage.result b/mysql-test/suite/galera/r/galera_backup_stage.result new file mode 100644 index 00000000000..6fb7d1643cd --- /dev/null +++ b/mysql-test/suite/galera/r/galera_backup_stage.result @@ -0,0 +1,78 @@ +connection node_2; +connection node_1; +connection node_1; +CREATE TABLE t1 (f1 varchar(10)) ENGINE=InnoDB; +BACKUP STAGE START; +BACKUP STAGE FLUSH; +BACKUP STAGE END; +BACKUP STAGE START; +BACKUP STAGE FLUSH; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1a; +SET SESSION wsrep_sync_wait=0; +SET SESSION wsrep_retry_autocommit=0; +INSERT INTO t1 (f1) values ("node1_1"); +ALTER TABLE t1 ADD COLUMN (f2 int(10)); +connection node_2; +INSERT INTO t1 (f1) values ("node2_1"); +ALTER TABLE t1 ADD COLUMN (f3 int(10)); +connection node_1; +BACKUP STAGE BLOCK_DDL; +connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1c; +SET SESSION wsrep_sync_wait=0; +connection node_2; +INSERT INTO t1 (f1) values("node2_2"); +ALTER TABLE t1 ADD COLUMN (f5 int(10)); +connection node_1a; +ALTER TABLE t1 ADD COLUMN (f4 int(10)); +ERROR 08S01: Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE. +INSERT INTO t1 (f1) values("node1a");; +connection node_1c; +connection node_1; +BACKUP STAGE BLOCK_COMMIT; +connection node_1c; +SELECT variable_value="Donor/Desynced" FROM information_schema.global_status WHERE variable_name="wsrep_local_state_comment"; +variable_value="Donor/Desynced" +1 +connection node_2; +INSERT INTO t1 (f1) values("node2_3"); +ALTER TABLE t1 ADD COLUMN (f6 int(10)); +connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1b; +SET SESSION wsrep_sync_wait=0; +SET SESSION wsrep_retry_autocommit=0; +ALTER TABLE t1 ADD COLUMN (f4 int(10)); +ERROR 08S01: Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE. +INSERT INTO t1 (f1) values("node1b");; +connection node_1c; +SELECT COUNT(*)=2 FROM t1; +COUNT(*)=2 +1 +SELECT COUNT(*)=3 FROM information_schema.columns WHERE table_name = 't1'; +COUNT(*)=3 +1 +connection node_1; +BACKUP STAGE END; +connection node_1a; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +connection node_1b; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +connection node_1; +SELECT COUNT(*)=4 FROM t1; +COUNT(*)=4 +1 +SELECT COUNT(*)=5 FROM information_schema.columns WHERE table_name = 't1'; +COUNT(*)=5 +1 +connection node_2; +SELECT COUNT(*)=4 FROM t1; +COUNT(*)=4 +1 +SELECT COUNT(*)=5 FROM information_schema.columns WHERE table_name = 't1'; +COUNT(*)=5 +1 +connection node_1; +DROP TABLE t1; +call mtr.add_suppression("WSREP: ALTER TABLE isolation failure"); +call mtr.add_suppression("greater than drain seqno"); diff --git a/mysql-test/suite/galera/t/galera_backup_stage.test b/mysql-test/suite/galera/t/galera_backup_stage.test new file mode 100644 index 00000000000..31d76816355 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_backup_stage.test @@ -0,0 +1,120 @@ +# +# Check that BACKUP STAGE BLOCK_DDL desyncs and pauses the node until BACKUP STAGE END: +# - Local DDLs will fail immediately +# - Local DMLs will block until resync +# - Remote txns will be applied after resync (STAGE END). +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_metadata_lock_info.inc + +--connection node_1 +CREATE TABLE t1 (f1 varchar(10)) ENGINE=InnoDB; + +# First, check that BACKUP STAGE END skipping desyncing stages is fine +BACKUP STAGE START; +BACKUP STAGE FLUSH; +BACKUP STAGE END; + +BACKUP STAGE START; +BACKUP STAGE FLUSH; + +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1a +SET SESSION wsrep_sync_wait=0; +SET SESSION wsrep_retry_autocommit=0; +INSERT INTO t1 (f1) values ("node1_1"); +ALTER TABLE t1 ADD COLUMN (f2 int(10)); + +--connection node_2 +INSERT INTO t1 (f1) values ("node2_1"); +ALTER TABLE t1 ADD COLUMN (f3 int(10)); + +# BLOCK_DDL desyncs and pauses the node +--connection node_1 +BACKUP STAGE BLOCK_DDL; + +--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1c +SET SESSION wsrep_sync_wait=0; +--let $wait_condition = SELECT variable_value="Donor/Desynced" FROM information_schema.global_status WHERE variable_name="wsrep_local_state_comment" +--source include/wait_condition.inc + +--connection node_2 +INSERT INTO t1 (f1) values("node2_2"); +ALTER TABLE t1 ADD COLUMN (f5 int(10)); + +--connection node_1a +--error ER_UNKNOWN_COM_ERROR +ALTER TABLE t1 ADD COLUMN (f4 int(10)); +--let $insert_id = `SELECT CONNECTION_ID()` +--send INSERT INTO t1 (f1) values("node1a"); + +# the insert will block during commit inside the provider, in certify. We can't +# check for sure it is blocked there, so we wait for the thread to at least +# reach commit stage. In the unlikely case the interleaving is different, the +# result of the test should not change. +--connection node_1c +--let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.processlist WHERE State='Commit' AND ID=$insert_id +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.metadata_lock_info WHERE TABLE_NAME='t1' AND THREAD_ID=$insert_id +--source include/wait_condition.inc + +--connection node_1 +BACKUP STAGE BLOCK_COMMIT; + +# node only resumes/resyncs upon STAGE END +--connection node_1c +SELECT variable_value="Donor/Desynced" FROM information_schema.global_status WHERE variable_name="wsrep_local_state_comment"; + +--connection node_2 +INSERT INTO t1 (f1) values("node2_3"); +ALTER TABLE t1 ADD COLUMN (f6 int(10)); + +--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1b +SET SESSION wsrep_sync_wait=0; +SET SESSION wsrep_retry_autocommit=0; +--error ER_UNKNOWN_COM_ERROR +ALTER TABLE t1 ADD COLUMN (f4 int(10)); +--let $insert_id = `SELECT CONNECTION_ID()` +--send INSERT INTO t1 (f1) values("node1b"); + +# wait for insert to get blocked +--connection node_1c +--let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.processlist WHERE State='Commit' AND ID=$insert_id +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.metadata_lock_info WHERE TABLE_NAME='t1' AND THREAD_ID=$insert_id +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*)=2 FROM information_schema.processlist WHERE Info like 'INSERT INTO t1 (f1) values("node1%")' AND State = 'Commit' +--source include/wait_condition.inc + +# nothing after BLOCK_DDL is applied +SELECT COUNT(*)=2 FROM t1; +SELECT COUNT(*)=3 FROM information_schema.columns WHERE table_name = 't1'; + +# STAGE END resumes and resyncs the node +--connection node_1 +BACKUP STAGE END; + +# Upon resume, blocked inserts will continue but conflict with the applying alters +--connection node_1a +--error ER_LOCK_DEADLOCK +--reap +--connection node_1b +--error ER_LOCK_DEADLOCK +--reap + +--connection node_1 +SELECT COUNT(*)=4 FROM t1; +SELECT COUNT(*)=5 FROM information_schema.columns WHERE table_name = 't1'; + +--connection node_2 +SELECT COUNT(*)=4 FROM t1; +SELECT COUNT(*)=5 FROM information_schema.columns WHERE table_name = 't1'; + +--connection node_1 +DROP TABLE t1; +call mtr.add_suppression("WSREP: ALTER TABLE isolation failure"); +call mtr.add_suppression("greater than drain seqno"); diff --git a/sql/backup.cc b/sql/backup.cc index cff14415d96..c021d0fc552 100644 --- a/sql/backup.cc +++ b/sql/backup.cc @@ -34,6 +34,7 @@ #include "sql_insert.h" // kill_delayed_threads #include "sql_handler.h" // mysql_ha_cleanup_no_free #include +#include "wsrep_mysqld.h" static const char *stage_names[]= {"START", "FLUSH", "BLOCK_DDL", "BLOCK_COMMIT", "END", 0}; @@ -254,6 +255,21 @@ static bool backup_block_ddl(THD *thd) (void) flush_tables(thd, FLUSH_NON_TRANS_TABLES); thd->clear_error(); +#ifdef WITH_WSREP + /* + We desync the node for BACKUP STAGE because applier threads + bypass backup MDL locks (see MDL_lock::can_grant_lock) + */ + if (WSREP_NNULL(thd)) + { + Wsrep_server_state &server_state= Wsrep_server_state::instance(); + if (server_state.desync_and_pause().is_undefined()) { + DBUG_RETURN(1); + } + thd->wsrep_desynced_backup_stage= true; + } +#endif /* WITH_WSREP */ + /* block new DDL's, in addition to all previous blocks We didn't do this lock above, as we wanted DDL's to be executed while @@ -318,6 +334,14 @@ bool backup_end(THD *thd) ha_end_backup(); thd->current_backup_stage= BACKUP_FINISHED; thd->mdl_context.release_lock(backup_flush_ticket); +#ifdef WITH_WSREP + if (WSREP_NNULL(thd) && thd->wsrep_desynced_backup_stage) + { + Wsrep_server_state &server_state= Wsrep_server_state::instance(); + server_state.resume_and_resync(); + thd->wsrep_desynced_backup_stage= false; + } +#endif /* WITH_WSREP */ } DBUG_RETURN(0); } diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 7e43605b047..8aaa0ebbe71 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1281,6 +1281,7 @@ void THD::init() m_wsrep_next_trx_id = WSREP_UNDEFINED_TRX_ID; wsrep_replicate_GTID = false; wsrep_aborter = 0; + wsrep_desynced_backup_stage= false; #endif /* WITH_WSREP */ if (variables.sql_log_bin) diff --git a/sql/sql_class.h b/sql/sql_class.h index a439da53a7d..8e2b6eed338 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3011,6 +3011,9 @@ public: uint server_status,open_options; enum enum_thread_type system_thread; enum backup_stages current_backup_stage; +#ifdef WITH_WSREP + bool wsrep_desynced_backup_stage; +#endif /* WITH_WSREP */ /* Current or next transaction isolation level. When a connection is established, the value is taken from diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 193c06725cf..cabf066abf4 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -2168,7 +2168,7 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, if (Wsrep_server_state::instance().desynced_on_pause()) { my_message(ER_UNKNOWN_COM_ERROR, - "Aborting TOI: Global Read-Lock (FTWRL) in place.", MYF(0)); + "Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE.", MYF(0)); return -1; }