mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-35465 Async replication stops working on Galera async replica node when parallel replication is enabled
Parallel slave failed to retry in retry_event_group() with error WSREP: Parallel slave worker failed at wsrep_before_command() hook Fix wsrep transaction cleanup/restart in retry_event_group() to properly clean up previous transaction by calling wsrep_after_statement(). Also move call to reset error after call to wsrep_after_statement() to make sure that it remains effective. Add a MTR test galera_as_slave_parallel_retry to reproduce the error when the fix is not present. Other issues which were detected when testing with sysbench: Check if parallel slave is killed for retry before waiting for prior commits in THD::wsrep_parallel_slave_wait_for_prior_commit(). This is required with slave-parallel-mode=optimistic to avoid deadlock when a slave later in commit order manages to reach prepare phase before a lock conflict is detected. Suppress wsrep applier specific warning for slave threads. Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
committed by
Julius Goryavsky
parent
c772344510
commit
a2575a0703
@ -134,7 +134,7 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev)
|
||||
asynchronously, we need to be sure they will be completed before starting a
|
||||
new transaction. Otherwise the new transaction might suffer a spurious kill.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
wait_for_pending_deadlock_kill(THD *thd, rpl_group_info *rgi)
|
||||
{
|
||||
PSI_stage_info old_stage;
|
||||
@ -831,8 +831,12 @@ do_retry:
|
||||
err= 0;
|
||||
errmsg= NULL;
|
||||
#ifdef WITH_WSREP
|
||||
thd->wsrep_cs().reset_error();
|
||||
WSREP_DEBUG("retrying async replication event");
|
||||
DBUG_EXECUTE_IF("sync.wsrep_retry_event_group", {
|
||||
const char act[]= "now "
|
||||
"SIGNAL sync.wsrep_retry_event_group_reached "
|
||||
"WAIT_FOR signal.wsrep_retry_event_group";
|
||||
debug_sync_set_action(thd, STRING_WITH_LEN(act));
|
||||
};);
|
||||
#endif /* WITH_WSREP */
|
||||
|
||||
/*
|
||||
@ -981,15 +985,20 @@ do_retry:
|
||||
*/
|
||||
thd->reset_killed();
|
||||
#ifdef WITH_WSREP
|
||||
if (wsrep_before_command(thd))
|
||||
if (WSREP(thd))
|
||||
{
|
||||
WSREP_WARN("Parallel slave worker failed at wsrep_before_command() hook");
|
||||
err= 1;
|
||||
goto err;
|
||||
/* Exec after statement hook to make sure that the failed transaction
|
||||
* gets cleared and reset error state. */
|
||||
if (wsrep_after_statement(thd))
|
||||
{
|
||||
WSREP_WARN("Parallel slave worker failed at wsrep_after_statement() hook");
|
||||
err= 1;
|
||||
goto err;
|
||||
}
|
||||
thd->wsrep_cs().reset_error();
|
||||
wsrep_start_trx_if_not_started(thd);
|
||||
WSREP_DEBUG("parallel slave retry, after trx start");
|
||||
}
|
||||
wsrep_start_trx_if_not_started(thd);
|
||||
WSREP_DEBUG("parallel slave retry, after trx start");
|
||||
|
||||
#endif /* WITH_WSREP */
|
||||
strmake_buf(log_name, ir->name);
|
||||
if ((fd= open_binlog(&rlog, log_name, &errmsg)) <0)
|
||||
|
Reference in New Issue
Block a user