1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-33211 : Galera SST on maria-backup causes donor node to be unresponsive

If mariabackup with backup locks is used on SST we do not
pause and desync galera provider at all. If WSREP_MODE_BF_MARIABACKUP
case provider is paused and desync at BLOCK_COMMIT phase. In
other cases provider is paused and desync at BLOCK_DDL phase.
This commit is contained in:
Jan Lindström
2024-02-08 16:47:00 +02:00
committed by Monty
parent 5d4adeabe2
commit 41b435fea9
5 changed files with 163 additions and 33 deletions

View File

@@ -39,6 +39,7 @@
#ifdef WITH_WSREP
#include "wsrep_server_state.h"
#include "wsrep_mysqld.h"
#include "wsrep_sst.h"
#endif /* WITH_WSREP */
static const char *stage_names[]=
@@ -293,29 +294,40 @@ static bool backup_block_ddl(THD *thd)
#ifdef WITH_WSREP
DBUG_ASSERT(thd->wsrep_desynced_backup_stage == false);
/*
if user is specifically choosing to allow BF aborting for BACKUP STAGE BLOCK_DDL lock
holder, then do not desync and pause the node from cluster replication.
e.g. mariabackup uses BACKUP STATE BLOCK_DDL; and will be abortable by this.
But, If node is processing as SST donor or WSREP_MODE_BF_MARIABACKUP mode is not set,
we desync the node for BACKUP STAGE because applier threads
bypass backup MDL locks (see MDL_lock::can_grant_lock)
*/
if (WSREP_NNULL(thd))
{
Wsrep_server_state &server_state= Wsrep_server_state::instance();
if (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) ||
server_state.state() == Wsrep_server_state::s_donor)
/*
If user is specifically choosing to allow BF aborting for
BACKUP STAGE BLOCK_DDL lock holder, then do not desync and
pause the node from cluster replication. e.g. mariabackup
uses BACKUP STATE BLOCK_DDL; and will be abortable by this.
*/
bool mariabackup= (server_state.state() == Wsrep_server_state::s_donor
&& !strcmp(wsrep_sst_method, "mariabackup"));
bool allow_bf= wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP);
bool pause_and_desync= true;
if ((allow_bf) || (mariabackup))
{
if (server_state.desync_and_pause().is_undefined()) {
pause_and_desync= false;
}
if (pause_and_desync)
{
if (server_state.desync_and_pause().is_undefined())
DBUG_RETURN(1);
}
WSREP_INFO("Server desynched from group during BACKUP STAGE BLOCK_DDL.");
DEBUG_SYNC(thd, "wsrep_backup_stage_after_desync_and_pause");
thd->wsrep_desynced_backup_stage= true;
}
else
WSREP_INFO("Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used.");
{
WSREP_INFO("Server not desynched from group at BLOCK_DDL because %s is used.",
allow_bf ? "WSREP_MODE_BF_MARIABACKUP" : wsrep_sst_method);
}
}
#endif /* WITH_WSREP */
@@ -399,6 +411,28 @@ static bool backup_block_commit(THD *thd)
}
thd->clear_error();
#ifdef WITH_WSREP
if (WSREP_NNULL(thd) && !thd->wsrep_desynced_backup_stage)
{
Wsrep_server_state &server_state= Wsrep_server_state::instance();
bool mariabackup= (server_state.state() == Wsrep_server_state::s_donor
&& !strcmp(wsrep_sst_method, "mariabackup"));
/* If this node is donor and mariabackup is not used
we desync and pause provider here if it is not yet done.
*/
if (!mariabackup)
{
if (server_state.desync_and_pause().is_undefined())
DBUG_RETURN(1);
WSREP_INFO("Server desynched from group during BACKUP STAGE BLOCK_COMMIT.");
thd->wsrep_desynced_backup_stage= true;
DEBUG_SYNC(thd, "wsrep_backup_stage_commit_after_desync_and_pause");
}
}
#endif /* WITH_WSREP */
DBUG_RETURN(0);
}