From 41b435fea991dbb31e1b652d8b0173b87d45859d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Thu, 8 Feb 2024 16:47:00 +0200 Subject: [PATCH] MDEV-33211 : Galera SST on maria-backup causes donor node to be unresponsive If mariabackup with backup locks is used on SST we do not pause and desync galera provider at all. If WSREP_MODE_BF_MARIABACKUP case provider is paused and desync at BLOCK_COMMIT phase. In other cases provider is paused and desync at BLOCK_DDL phase. --- extra/mariabackup/backup_mysql.cc | 95 ++++++++++++++++++- .../include/wait_until_connected_again.inc | 2 +- .../r/galera_bf_abort_mariabackup.result | 24 ++--- .../galera/t/galera_bf_abort_mariabackup.test | 15 +-- sql/backup.cc | 60 +++++++++--- 5 files changed, 163 insertions(+), 33 deletions(-) diff --git a/extra/mariabackup/backup_mysql.cc b/extra/mariabackup/backup_mysql.cc index 2d4b19c20a7..ab543b5ad0b 100644 --- a/extra/mariabackup/backup_mysql.cc +++ b/extra/mariabackup/backup_mysql.cc @@ -1417,12 +1417,103 @@ write_slave_info(ds_ctxt *datasink, MYSQL *connection) /*********************************************************************//** -Old function, not needed anymore with BACKUP LOCKS +Retrieves MySQL Galera and saves it in a file. It also prints it to stdout. + +We should create xtrabackup_galelera_info file even when backup locks +are used because donor's wsrep_gtid_domain_id is needed later in joiner. +Note that at this stage wsrep_local_state_uuid and wsrep_last_committed +are inconsistent but they are not used in joiner. Joiner will rewrite this file +at mariabackup --prepare phase and thus there is extra file donor_galera_info. +Information is needed to maitain wsrep_gtid_domain_id and gtid_binlog_pos +same across the cluster. If joiner node have different wsrep_gtid_domain_id +we should still receive effective domain id from the donor node, +and use it. */ bool write_galera_info(ds_ctxt *datasink, MYSQL *connection) { - return true; // Success + char *state_uuid = NULL, *state_uuid55 = NULL; + char *last_committed = NULL, *last_committed55 = NULL; + char *domain_id = NULL, *domain_id55 = NULL; + bool result=true; + uint n_values=0; + char *wsrep_on = NULL, *wsrep_on55 = NULL; + + mysql_variable vars[] = { + {"Wsrep_on", &wsrep_on}, + {"wsrep_on", &wsrep_on55}, + {NULL, NULL} + }; + + mysql_variable status[] = { + {"Wsrep_local_state_uuid", &state_uuid}, + {"wsrep_local_state_uuid", &state_uuid55}, + {"Wsrep_last_committed", &last_committed}, + {"wsrep_last_committed", &last_committed55}, + {NULL, NULL} + }; + + mysql_variable value[] = { + {"Wsrep_gtid_domain_id", &domain_id}, + {"wsrep_gtid_domain_id", &domain_id55}, + {NULL, NULL} + }; + + n_values= read_mysql_variables(connection, "SHOW VARIABLES", vars, true); + + if (n_values == 0 || (wsrep_on == NULL && wsrep_on55 == NULL)) + { + msg("Server is not Galera node thus --galera-info does not " + "have any effect."); + result = true; + goto cleanup; + } + + read_mysql_variables(connection, "SHOW STATUS", status, true); + + if ((state_uuid == NULL && state_uuid55 == NULL) + || (last_committed == NULL && last_committed55 == NULL)) + { + msg("Warning: failed to get master wsrep state from SHOW STATUS."); + result = true; + goto cleanup; + } + + n_values= read_mysql_variables(connection, "SHOW VARIABLES LIKE 'wsrep%'", value, true); + + if (n_values == 0 || (domain_id == NULL && domain_id55 == NULL)) + { + msg("Warning: failed to get master wsrep state from SHOW VARIABLES."); + result = true; + goto cleanup; + } + + result= datasink->backup_file_printf(XTRABACKUP_GALERA_INFO, + "%s:%s %s\n", state_uuid ? state_uuid : state_uuid55, + last_committed ? last_committed : last_committed55, + domain_id ? domain_id : domain_id55); + + if (result) + { + result= datasink->backup_file_printf(XTRABACKUP_DONOR_GALERA_INFO, + "%s:%s %s\n", state_uuid ? state_uuid : state_uuid55, + last_committed ? last_committed : last_committed55, + domain_id ? domain_id : domain_id55); + } + + if (result) + write_current_binlog_file(datasink, connection); + + if (result) + msg("Writing Galera info succeeded with %s:%s %s", + state_uuid ? state_uuid : state_uuid55, + last_committed ? last_committed : last_committed55, + domain_id ? domain_id : domain_id55); + +cleanup: + free_mysql_variables(status); + + return(result); } diff --git a/mysql-test/include/wait_until_connected_again.inc b/mysql-test/include/wait_until_connected_again.inc index deb6ca13e8b..2b20c780b69 100644 --- a/mysql-test/include/wait_until_connected_again.inc +++ b/mysql-test/include/wait_until_connected_again.inc @@ -11,7 +11,7 @@ let $counter= 5000; let $mysql_errno= 9999; while ($mysql_errno) { - --error 0,ER_ACCESS_DENIED_ERROR,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,ER_LOCK_WAIT_TIMEOUT,2002,2006,2013,HA_ERR_NO_ENCRYPTION + --error 0,ER_ACCESS_DENIED_ERROR,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,ER_LOCK_WAIT_TIMEOUT,2002,2006,2013,HA_ERR_NO_ENCRYPTION,2026 select 1; dec $counter; diff --git a/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result b/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result index 88c200ee933..fa0568035a6 100644 --- a/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result +++ b/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result @@ -12,9 +12,9 @@ connection node_1; connection node_2; Starting server ... connection node_1; -# Both should return FOUND 2 as we have bootstrap and SST -FOUND 2 /Desyncing and pausing the provider/ in mysqld.1.err -FOUND 2 /Resuming and resyncing the provider/ in mysqld.1.err +# Both should return NOT FOUND as we have mariabackup with backup locks +NOT FOUND /Desyncing and pausing the provider/ in mysqld.1.err +NOT FOUND /Resuming and resyncing the provider/ in mysqld.1.err connection node_1; SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP"; # Restart node_2, force SST. @@ -25,9 +25,9 @@ connection node_2; Starting server ... connection node_2; connection node_1; -# Both should return FOUND 3 as we have 1 new SST -FOUND 3 /Desyncing and pausing the provider/ in mysqld.1.err -FOUND 3 /Resuming and resyncing the provider/ in mysqld.1.err +# Both should return NOT FOUND as we have mariabackup with backup locks +NOT FOUND /Desyncing and pausing the provider/ in mysqld.1.err +NOT FOUND /Resuming and resyncing the provider/ in mysqld.1.err SET GLOBAL wsrep_mode = ""; DROP TABLE t; # Case 2: MariaBackup backup from node_2 @@ -46,11 +46,13 @@ SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP"; SELECT @@wsrep_mode; @@wsrep_mode BF_ABORT_MARIABACKUP -# Both should return FOUND 1 as node should not desync -FOUND 1 /Desyncing and pausing the provider/ in mysqld.2.err -FOUND 1 /Resuming and resyncing the provider/ in mysqld.2.err -# Should return FOUND 1 because only last backup does not desync -FOUND 1 /Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used./ in mysqld.2.err +# Both should return FOUND 2 because both backups do desync but on different points +FOUND 2 /Desyncing and pausing the provider/ in mysqld.2.err +FOUND 2 /Resuming and resyncing the provider/ in mysqld.2.err +# Should return FOUND 1 as server did not desync at BLOCK_DDL +FOUND 1 /Server not desynched from group at BLOCK_DDL because WSREP_MODE_BF_MARIABACKUP is used./ in mysqld.2.err +# Should return FOUND 1 as server did desync and pause at BLOCK_COMMIT +FOUND 1 /Server desynched from group during BACKUP STAGE BLOCK_COMMIT./ in mysqld.2.err SET GLOBAL wsrep_mode = ""; connection node_1; DROP TABLE t; diff --git a/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test b/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test index 34c3f5d3621..ed16ac3926c 100644 --- a/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test +++ b/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test @@ -13,7 +13,7 @@ CREATE TABLE t(i INT NOT NULL PRIMARY KEY) ENGINE INNODB; INSERT INTO t VALUES(1); # -# In default settings donor should desync +# In default settings donor should not desync # --echo # Restart node_2, force SST. --connection node_2 @@ -37,7 +37,7 @@ let $restart_noprint=2; --connection node_1 let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; ---echo # Both should return FOUND 2 as we have bootstrap and SST +--echo # Both should return NOT FOUND as we have mariabackup with backup locks let SEARCH_PATTERN = Desyncing and pausing the provider; --source include/search_pattern_in_file.inc let SEARCH_PATTERN = Resuming and resyncing the provider; @@ -76,7 +76,7 @@ let $restart_noprint=2; --connection node_1 let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; ---echo # Both should return FOUND 3 as we have 1 new SST +--echo # Both should return NOT FOUND as we have mariabackup with backup locks let SEARCH_PATTERN = Desyncing and pausing the provider; --source include/search_pattern_in_file.inc let SEARCH_PATTERN = Resuming and resyncing the provider; @@ -117,13 +117,16 @@ let $targetdir=$MYSQLTEST_VARDIR/tmp/backup2; --enable_result_log let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.2.err; ---echo # Both should return FOUND 1 as node should not desync +--echo # Both should return FOUND 2 because both backups do desync but on different points let SEARCH_PATTERN = Desyncing and pausing the provider; --source include/search_pattern_in_file.inc let SEARCH_PATTERN = Resuming and resyncing the provider; --source include/search_pattern_in_file.inc ---echo # Should return FOUND 1 because only last backup does not desync -let SEARCH_PATTERN = Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used.; +--echo # Should return FOUND 1 as server did not desync at BLOCK_DDL +let SEARCH_PATTERN = Server not desynched from group at BLOCK_DDL because WSREP_MODE_BF_MARIABACKUP is used.; +--source include/search_pattern_in_file.inc +--echo # Should return FOUND 1 as server did desync and pause at BLOCK_COMMIT +let SEARCH_PATTERN = Server desynched from group during BACKUP STAGE BLOCK_COMMIT.; --source include/search_pattern_in_file.inc SET GLOBAL wsrep_mode = ""; diff --git a/sql/backup.cc b/sql/backup.cc index 5ce770c3c4c..f634a11f867 100644 --- a/sql/backup.cc +++ b/sql/backup.cc @@ -39,6 +39,7 @@ #ifdef WITH_WSREP #include "wsrep_server_state.h" #include "wsrep_mysqld.h" +#include "wsrep_sst.h" #endif /* WITH_WSREP */ static const char *stage_names[]= @@ -293,29 +294,40 @@ static bool backup_block_ddl(THD *thd) #ifdef WITH_WSREP DBUG_ASSERT(thd->wsrep_desynced_backup_stage == false); - /* - if user is specifically choosing to allow BF aborting for BACKUP STAGE BLOCK_DDL lock - holder, then do not desync and pause the node from cluster replication. - e.g. mariabackup uses BACKUP STATE BLOCK_DDL; and will be abortable by this. - But, If node is processing as SST donor or WSREP_MODE_BF_MARIABACKUP mode is not set, - we desync the node for BACKUP STAGE because applier threads - bypass backup MDL locks (see MDL_lock::can_grant_lock) - */ if (WSREP_NNULL(thd)) { Wsrep_server_state &server_state= Wsrep_server_state::instance(); - if (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) || - server_state.state() == Wsrep_server_state::s_donor) + /* + If user is specifically choosing to allow BF aborting for + BACKUP STAGE BLOCK_DDL lock holder, then do not desync and + pause the node from cluster replication. e.g. mariabackup + uses BACKUP STATE BLOCK_DDL; and will be abortable by this. + */ + bool mariabackup= (server_state.state() == Wsrep_server_state::s_donor + && !strcmp(wsrep_sst_method, "mariabackup")); + bool allow_bf= wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP); + bool pause_and_desync= true; + + if ((allow_bf) || (mariabackup)) { - if (server_state.desync_and_pause().is_undefined()) { + pause_and_desync= false; + } + + if (pause_and_desync) + { + if (server_state.desync_and_pause().is_undefined()) DBUG_RETURN(1); - } + + WSREP_INFO("Server desynched from group during BACKUP STAGE BLOCK_DDL."); DEBUG_SYNC(thd, "wsrep_backup_stage_after_desync_and_pause"); thd->wsrep_desynced_backup_stage= true; } else - WSREP_INFO("Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used."); + { + WSREP_INFO("Server not desynched from group at BLOCK_DDL because %s is used.", + allow_bf ? "WSREP_MODE_BF_MARIABACKUP" : wsrep_sst_method); + } } #endif /* WITH_WSREP */ @@ -399,6 +411,28 @@ static bool backup_block_commit(THD *thd) } thd->clear_error(); +#ifdef WITH_WSREP + if (WSREP_NNULL(thd) && !thd->wsrep_desynced_backup_stage) + { + Wsrep_server_state &server_state= Wsrep_server_state::instance(); + bool mariabackup= (server_state.state() == Wsrep_server_state::s_donor + && !strcmp(wsrep_sst_method, "mariabackup")); + + /* If this node is donor and mariabackup is not used + we desync and pause provider here if it is not yet done. + */ + if (!mariabackup) + { + if (server_state.desync_and_pause().is_undefined()) + DBUG_RETURN(1); + + WSREP_INFO("Server desynched from group during BACKUP STAGE BLOCK_COMMIT."); + thd->wsrep_desynced_backup_stage= true; + DEBUG_SYNC(thd, "wsrep_backup_stage_commit_after_desync_and_pause"); + } + } +#endif /* WITH_WSREP */ + DBUG_RETURN(0); }