1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

MDEV-12746 rpl.rpl_parallel_optimistic_nobinlog fails committing

out of order at retry

The test failures were of two sorts. One is that the number of retries
what the slave thought as a temporary error exceeded
the default value of the slave retry option.
The 2nd issue was an out of order commit by transactions that
were supposed to error out instead.
Both issues are caused by the same reason that the post-temporary-error
retry did not check possibly already existing error status.

This is mended with refining conditions to retry. Specifically, a retrying
worker checks `rpl_parallel_entry::stop_on_error_sub_id` that
a potential failing predecessor could set to its own sub id.
Now should the member be set the retrying follower errors out with
ER_PRIOR_COMMIT_FAILED.
This commit is contained in:
Andrei Elkin
2018-02-09 15:00:23 +02:00
parent 76ae6e725d
commit 30019a48bf
3 changed files with 194 additions and 3 deletions

View File

@ -229,6 +229,14 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
entry->stop_on_error_sub_id= sub_id;
mysql_mutex_unlock(&entry->LOCK_parallel_entry);
DBUG_EXECUTE_IF("rpl_parallel_simulate_wait_at_retry", {
if (rgi->current_gtid.seq_no == 1000) {
DBUG_ASSERT(entry->stop_on_error_sub_id == sub_id);
debug_sync_set_action(thd,
STRING_WITH_LEN("now WAIT_FOR proceed_by_1000"));
}
});
if (rgi->killed_for_retry == rpl_group_info::RETRY_KILL_PENDING)
wait_for_pending_deadlock_kill(thd, rgi);
thd->clear_error();
@ -716,12 +724,20 @@ do_retry:
unregistering (and later re-registering) the wait.
*/
if(thd->wait_for_commit_ptr)
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
DBUG_EXECUTE_IF("inject_mdev8031", {
/* Simulate that we get deadlock killed at this exact point. */
rgi->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED;
thd->set_killed(KILL_CONNECTION);
});
DBUG_EXECUTE_IF("rpl_parallel_simulate_wait_at_retry", {
if (rgi->current_gtid.seq_no == 1001) {
debug_sync_set_action(thd,
STRING_WITH_LEN("rpl_parallel_simulate_wait_at_retry WAIT_FOR proceed_by_1001"));
}
DEBUG_SYNC(thd, "rpl_parallel_simulate_wait_at_retry");
});
rgi->cleanup_context(thd, 1);
wait_for_pending_deadlock_kill(thd, rgi);
thd->reset_killed();
@ -745,7 +761,26 @@ do_retry:
for (;;)
{
mysql_mutex_lock(&entry->LOCK_parallel_entry);
register_wait_for_prior_event_group_commit(rgi, entry);
if (entry->stop_on_error_sub_id == (uint64) ULONGLONG_MAX ||
#ifndef DBUG_OFF
(DBUG_EVALUATE_IF("simulate_mdev_12746", 1, 0)) ||
#endif
rgi->gtid_sub_id < entry->stop_on_error_sub_id)
{
register_wait_for_prior_event_group_commit(rgi, entry);
}
else
{
/*
A failure of a preceeding "parent" transaction may not be
seen by the current one through its own worker_error.
Such induced error gets set by ourselves now.
*/
err= rgi->worker_error= 1;
my_error(ER_PRIOR_COMMIT_FAILED, MYF(0));
mysql_mutex_unlock(&entry->LOCK_parallel_entry);
goto err;
}
mysql_mutex_unlock(&entry->LOCK_parallel_entry);
/*