mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
MDEV-33211 : Galera SST on maria-backup causes donor node to be unresponsive
If mariabackup with backup locks is used on SST we do not pause and desync galera provider at all. If WSREP_MODE_BF_MARIABACKUP case provider is paused and desync at BLOCK_COMMIT phase. In other cases provider is paused and desync at BLOCK_DDL phase.
This commit is contained in:
@ -1417,12 +1417,103 @@ write_slave_info(ds_ctxt *datasink, MYSQL *connection)
|
||||
|
||||
|
||||
/*********************************************************************//**
|
||||
Old function, not needed anymore with BACKUP LOCKS
|
||||
Retrieves MySQL Galera and saves it in a file. It also prints it to stdout.
|
||||
|
||||
We should create xtrabackup_galelera_info file even when backup locks
|
||||
are used because donor's wsrep_gtid_domain_id is needed later in joiner.
|
||||
Note that at this stage wsrep_local_state_uuid and wsrep_last_committed
|
||||
are inconsistent but they are not used in joiner. Joiner will rewrite this file
|
||||
at mariabackup --prepare phase and thus there is extra file donor_galera_info.
|
||||
Information is needed to maitain wsrep_gtid_domain_id and gtid_binlog_pos
|
||||
same across the cluster. If joiner node have different wsrep_gtid_domain_id
|
||||
we should still receive effective domain id from the donor node,
|
||||
and use it.
|
||||
*/
|
||||
bool
|
||||
write_galera_info(ds_ctxt *datasink, MYSQL *connection)
|
||||
{
|
||||
return true; // Success
|
||||
char *state_uuid = NULL, *state_uuid55 = NULL;
|
||||
char *last_committed = NULL, *last_committed55 = NULL;
|
||||
char *domain_id = NULL, *domain_id55 = NULL;
|
||||
bool result=true;
|
||||
uint n_values=0;
|
||||
char *wsrep_on = NULL, *wsrep_on55 = NULL;
|
||||
|
||||
mysql_variable vars[] = {
|
||||
{"Wsrep_on", &wsrep_on},
|
||||
{"wsrep_on", &wsrep_on55},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
mysql_variable status[] = {
|
||||
{"Wsrep_local_state_uuid", &state_uuid},
|
||||
{"wsrep_local_state_uuid", &state_uuid55},
|
||||
{"Wsrep_last_committed", &last_committed},
|
||||
{"wsrep_last_committed", &last_committed55},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
mysql_variable value[] = {
|
||||
{"Wsrep_gtid_domain_id", &domain_id},
|
||||
{"wsrep_gtid_domain_id", &domain_id55},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
n_values= read_mysql_variables(connection, "SHOW VARIABLES", vars, true);
|
||||
|
||||
if (n_values == 0 || (wsrep_on == NULL && wsrep_on55 == NULL))
|
||||
{
|
||||
msg("Server is not Galera node thus --galera-info does not "
|
||||
"have any effect.");
|
||||
result = true;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
read_mysql_variables(connection, "SHOW STATUS", status, true);
|
||||
|
||||
if ((state_uuid == NULL && state_uuid55 == NULL)
|
||||
|| (last_committed == NULL && last_committed55 == NULL))
|
||||
{
|
||||
msg("Warning: failed to get master wsrep state from SHOW STATUS.");
|
||||
result = true;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
n_values= read_mysql_variables(connection, "SHOW VARIABLES LIKE 'wsrep%'", value, true);
|
||||
|
||||
if (n_values == 0 || (domain_id == NULL && domain_id55 == NULL))
|
||||
{
|
||||
msg("Warning: failed to get master wsrep state from SHOW VARIABLES.");
|
||||
result = true;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
result= datasink->backup_file_printf(XTRABACKUP_GALERA_INFO,
|
||||
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
|
||||
last_committed ? last_committed : last_committed55,
|
||||
domain_id ? domain_id : domain_id55);
|
||||
|
||||
if (result)
|
||||
{
|
||||
result= datasink->backup_file_printf(XTRABACKUP_DONOR_GALERA_INFO,
|
||||
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
|
||||
last_committed ? last_committed : last_committed55,
|
||||
domain_id ? domain_id : domain_id55);
|
||||
}
|
||||
|
||||
if (result)
|
||||
write_current_binlog_file(datasink, connection);
|
||||
|
||||
if (result)
|
||||
msg("Writing Galera info succeeded with %s:%s %s",
|
||||
state_uuid ? state_uuid : state_uuid55,
|
||||
last_committed ? last_committed : last_committed55,
|
||||
domain_id ? domain_id : domain_id55);
|
||||
|
||||
cleanup:
|
||||
free_mysql_variables(status);
|
||||
|
||||
return(result);
|
||||
}
|
||||
|
||||
|
||||
|
@ -11,7 +11,7 @@ let $counter= 5000;
|
||||
let $mysql_errno= 9999;
|
||||
while ($mysql_errno)
|
||||
{
|
||||
--error 0,ER_ACCESS_DENIED_ERROR,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,ER_LOCK_WAIT_TIMEOUT,2002,2006,2013,HA_ERR_NO_ENCRYPTION
|
||||
--error 0,ER_ACCESS_DENIED_ERROR,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,ER_LOCK_WAIT_TIMEOUT,2002,2006,2013,HA_ERR_NO_ENCRYPTION,2026
|
||||
select 1;
|
||||
|
||||
dec $counter;
|
||||
|
@ -12,9 +12,9 @@ connection node_1;
|
||||
connection node_2;
|
||||
Starting server ...
|
||||
connection node_1;
|
||||
# Both should return FOUND 2 as we have bootstrap and SST
|
||||
FOUND 2 /Desyncing and pausing the provider/ in mysqld.1.err
|
||||
FOUND 2 /Resuming and resyncing the provider/ in mysqld.1.err
|
||||
# Both should return NOT FOUND as we have mariabackup with backup locks
|
||||
NOT FOUND /Desyncing and pausing the provider/ in mysqld.1.err
|
||||
NOT FOUND /Resuming and resyncing the provider/ in mysqld.1.err
|
||||
connection node_1;
|
||||
SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP";
|
||||
# Restart node_2, force SST.
|
||||
@ -25,9 +25,9 @@ connection node_2;
|
||||
Starting server ...
|
||||
connection node_2;
|
||||
connection node_1;
|
||||
# Both should return FOUND 3 as we have 1 new SST
|
||||
FOUND 3 /Desyncing and pausing the provider/ in mysqld.1.err
|
||||
FOUND 3 /Resuming and resyncing the provider/ in mysqld.1.err
|
||||
# Both should return NOT FOUND as we have mariabackup with backup locks
|
||||
NOT FOUND /Desyncing and pausing the provider/ in mysqld.1.err
|
||||
NOT FOUND /Resuming and resyncing the provider/ in mysqld.1.err
|
||||
SET GLOBAL wsrep_mode = "";
|
||||
DROP TABLE t;
|
||||
# Case 2: MariaBackup backup from node_2
|
||||
@ -46,11 +46,13 @@ SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP";
|
||||
SELECT @@wsrep_mode;
|
||||
@@wsrep_mode
|
||||
BF_ABORT_MARIABACKUP
|
||||
# Both should return FOUND 1 as node should not desync
|
||||
FOUND 1 /Desyncing and pausing the provider/ in mysqld.2.err
|
||||
FOUND 1 /Resuming and resyncing the provider/ in mysqld.2.err
|
||||
# Should return FOUND 1 because only last backup does not desync
|
||||
FOUND 1 /Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used./ in mysqld.2.err
|
||||
# Both should return FOUND 2 because both backups do desync but on different points
|
||||
FOUND 2 /Desyncing and pausing the provider/ in mysqld.2.err
|
||||
FOUND 2 /Resuming and resyncing the provider/ in mysqld.2.err
|
||||
# Should return FOUND 1 as server did not desync at BLOCK_DDL
|
||||
FOUND 1 /Server not desynched from group at BLOCK_DDL because WSREP_MODE_BF_MARIABACKUP is used./ in mysqld.2.err
|
||||
# Should return FOUND 1 as server did desync and pause at BLOCK_COMMIT
|
||||
FOUND 1 /Server desynched from group during BACKUP STAGE BLOCK_COMMIT./ in mysqld.2.err
|
||||
SET GLOBAL wsrep_mode = "";
|
||||
connection node_1;
|
||||
DROP TABLE t;
|
||||
|
@ -13,7 +13,7 @@
|
||||
CREATE TABLE t(i INT NOT NULL PRIMARY KEY) ENGINE INNODB;
|
||||
INSERT INTO t VALUES(1);
|
||||
#
|
||||
# In default settings donor should desync
|
||||
# In default settings donor should not desync
|
||||
#
|
||||
--echo # Restart node_2, force SST.
|
||||
--connection node_2
|
||||
@ -37,7 +37,7 @@ let $restart_noprint=2;
|
||||
|
||||
--connection node_1
|
||||
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
|
||||
--echo # Both should return FOUND 2 as we have bootstrap and SST
|
||||
--echo # Both should return NOT FOUND as we have mariabackup with backup locks
|
||||
let SEARCH_PATTERN = Desyncing and pausing the provider;
|
||||
--source include/search_pattern_in_file.inc
|
||||
let SEARCH_PATTERN = Resuming and resyncing the provider;
|
||||
@ -76,7 +76,7 @@ let $restart_noprint=2;
|
||||
|
||||
--connection node_1
|
||||
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
|
||||
--echo # Both should return FOUND 3 as we have 1 new SST
|
||||
--echo # Both should return NOT FOUND as we have mariabackup with backup locks
|
||||
let SEARCH_PATTERN = Desyncing and pausing the provider;
|
||||
--source include/search_pattern_in_file.inc
|
||||
let SEARCH_PATTERN = Resuming and resyncing the provider;
|
||||
@ -117,13 +117,16 @@ let $targetdir=$MYSQLTEST_VARDIR/tmp/backup2;
|
||||
--enable_result_log
|
||||
|
||||
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.2.err;
|
||||
--echo # Both should return FOUND 1 as node should not desync
|
||||
--echo # Both should return FOUND 2 because both backups do desync but on different points
|
||||
let SEARCH_PATTERN = Desyncing and pausing the provider;
|
||||
--source include/search_pattern_in_file.inc
|
||||
let SEARCH_PATTERN = Resuming and resyncing the provider;
|
||||
--source include/search_pattern_in_file.inc
|
||||
--echo # Should return FOUND 1 because only last backup does not desync
|
||||
let SEARCH_PATTERN = Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used.;
|
||||
--echo # Should return FOUND 1 as server did not desync at BLOCK_DDL
|
||||
let SEARCH_PATTERN = Server not desynched from group at BLOCK_DDL because WSREP_MODE_BF_MARIABACKUP is used.;
|
||||
--source include/search_pattern_in_file.inc
|
||||
--echo # Should return FOUND 1 as server did desync and pause at BLOCK_COMMIT
|
||||
let SEARCH_PATTERN = Server desynched from group during BACKUP STAGE BLOCK_COMMIT.;
|
||||
--source include/search_pattern_in_file.inc
|
||||
|
||||
SET GLOBAL wsrep_mode = "";
|
||||
|
@ -39,6 +39,7 @@
|
||||
#ifdef WITH_WSREP
|
||||
#include "wsrep_server_state.h"
|
||||
#include "wsrep_mysqld.h"
|
||||
#include "wsrep_sst.h"
|
||||
#endif /* WITH_WSREP */
|
||||
|
||||
static const char *stage_names[]=
|
||||
@ -293,29 +294,40 @@ static bool backup_block_ddl(THD *thd)
|
||||
|
||||
#ifdef WITH_WSREP
|
||||
DBUG_ASSERT(thd->wsrep_desynced_backup_stage == false);
|
||||
/*
|
||||
if user is specifically choosing to allow BF aborting for BACKUP STAGE BLOCK_DDL lock
|
||||
holder, then do not desync and pause the node from cluster replication.
|
||||
e.g. mariabackup uses BACKUP STATE BLOCK_DDL; and will be abortable by this.
|
||||
But, If node is processing as SST donor or WSREP_MODE_BF_MARIABACKUP mode is not set,
|
||||
we desync the node for BACKUP STAGE because applier threads
|
||||
bypass backup MDL locks (see MDL_lock::can_grant_lock)
|
||||
*/
|
||||
if (WSREP_NNULL(thd))
|
||||
{
|
||||
Wsrep_server_state &server_state= Wsrep_server_state::instance();
|
||||
|
||||
if (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) ||
|
||||
server_state.state() == Wsrep_server_state::s_donor)
|
||||
/*
|
||||
If user is specifically choosing to allow BF aborting for
|
||||
BACKUP STAGE BLOCK_DDL lock holder, then do not desync and
|
||||
pause the node from cluster replication. e.g. mariabackup
|
||||
uses BACKUP STATE BLOCK_DDL; and will be abortable by this.
|
||||
*/
|
||||
bool mariabackup= (server_state.state() == Wsrep_server_state::s_donor
|
||||
&& !strcmp(wsrep_sst_method, "mariabackup"));
|
||||
bool allow_bf= wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP);
|
||||
bool pause_and_desync= true;
|
||||
|
||||
if ((allow_bf) || (mariabackup))
|
||||
{
|
||||
if (server_state.desync_and_pause().is_undefined()) {
|
||||
DBUG_RETURN(1);
|
||||
pause_and_desync= false;
|
||||
}
|
||||
|
||||
if (pause_and_desync)
|
||||
{
|
||||
if (server_state.desync_and_pause().is_undefined())
|
||||
DBUG_RETURN(1);
|
||||
|
||||
WSREP_INFO("Server desynched from group during BACKUP STAGE BLOCK_DDL.");
|
||||
DEBUG_SYNC(thd, "wsrep_backup_stage_after_desync_and_pause");
|
||||
thd->wsrep_desynced_backup_stage= true;
|
||||
}
|
||||
else
|
||||
WSREP_INFO("Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used.");
|
||||
{
|
||||
WSREP_INFO("Server not desynched from group at BLOCK_DDL because %s is used.",
|
||||
allow_bf ? "WSREP_MODE_BF_MARIABACKUP" : wsrep_sst_method);
|
||||
}
|
||||
}
|
||||
#endif /* WITH_WSREP */
|
||||
|
||||
@ -399,6 +411,28 @@ static bool backup_block_commit(THD *thd)
|
||||
}
|
||||
thd->clear_error();
|
||||
|
||||
#ifdef WITH_WSREP
|
||||
if (WSREP_NNULL(thd) && !thd->wsrep_desynced_backup_stage)
|
||||
{
|
||||
Wsrep_server_state &server_state= Wsrep_server_state::instance();
|
||||
bool mariabackup= (server_state.state() == Wsrep_server_state::s_donor
|
||||
&& !strcmp(wsrep_sst_method, "mariabackup"));
|
||||
|
||||
/* If this node is donor and mariabackup is not used
|
||||
we desync and pause provider here if it is not yet done.
|
||||
*/
|
||||
if (!mariabackup)
|
||||
{
|
||||
if (server_state.desync_and_pause().is_undefined())
|
||||
DBUG_RETURN(1);
|
||||
|
||||
WSREP_INFO("Server desynched from group during BACKUP STAGE BLOCK_COMMIT.");
|
||||
thd->wsrep_desynced_backup_stage= true;
|
||||
DEBUG_SYNC(thd, "wsrep_backup_stage_commit_after_desync_and_pause");
|
||||
}
|
||||
}
|
||||
#endif /* WITH_WSREP */
|
||||
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user