mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
Merge MDEV-8031 into 10.1
This commit is contained in:
@@ -1591,7 +1591,7 @@ a b
|
|||||||
120 0
|
120 0
|
||||||
121 0
|
121 0
|
||||||
include/stop_slave.inc
|
include/stop_slave.inc
|
||||||
SET GLOBAL debug_dbug= @old_debug;
|
SET GLOBAL debug_dbug= @old_dbug;
|
||||||
include/start_slave.inc
|
include/start_slave.inc
|
||||||
*** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. ***
|
*** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. ***
|
||||||
include/stop_slave.inc
|
include/stop_slave.inc
|
||||||
@@ -1620,7 +1620,88 @@ a b
|
|||||||
130 0
|
130 0
|
||||||
131 0
|
131 0
|
||||||
include/stop_slave.inc
|
include/stop_slave.inc
|
||||||
SET GLOBAL debug_dbug= @old_debug;
|
SET GLOBAL debug_dbug= @old_dbug;
|
||||||
|
include/start_slave.inc
|
||||||
|
*** MDEV-8031: Parallel replication stops on "connection killed" error (probably incorrectly handled deadlock kill) ***
|
||||||
|
INSERT INTO t3 VALUES (201,0), (202,0);
|
||||||
|
include/save_master_gtid.inc
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
include/stop_slave.inc
|
||||||
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||||
|
SET GLOBAL debug_dbug= '+d,inject_mdev8031';
|
||||||
|
SET @old_dbug= @@SESSION.debug_dbug;
|
||||||
|
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||||
|
SET @commit_id= 10200;
|
||||||
|
INSERT INTO t3 VALUES (203, 1);
|
||||||
|
INSERT INTO t3 VALUES (204, 1);
|
||||||
|
INSERT INTO t3 VALUES (205, 1);
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=205;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=205;
|
||||||
|
SET SESSION debug_dbug=@old_dbug;
|
||||||
|
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||||
|
a b
|
||||||
|
201 3
|
||||||
|
202 4
|
||||||
|
203 4
|
||||||
|
204 4
|
||||||
|
205 3
|
||||||
|
include/save_master_gtid.inc
|
||||||
|
include/start_slave.inc
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||||
|
a b
|
||||||
|
201 3
|
||||||
|
202 4
|
||||||
|
203 4
|
||||||
|
204 4
|
||||||
|
205 3
|
||||||
|
include/stop_slave.inc
|
||||||
|
SET GLOBAL debug_dbug= @old_dbug;
|
||||||
|
include/start_slave.inc
|
||||||
|
*** Check getting deadlock killed inside open_binlog() during retry. ***
|
||||||
|
include/stop_slave.inc
|
||||||
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||||
|
SET GLOBAL debug_dbug= '+d,inject_retry_event_group_open_binlog_kill';
|
||||||
|
SET @old_max= @@GLOBAL.max_relay_log_size;
|
||||||
|
SET GLOBAL max_relay_log_size= 4096;
|
||||||
|
SET @old_dbug= @@SESSION.debug_dbug;
|
||||||
|
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||||
|
SET @commit_id= 10210;
|
||||||
|
Omit long queries that cause relaylog rotations and transaction retries...
|
||||||
|
SET SESSION debug_dbug=@old_dbug;
|
||||||
|
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||||
|
a b
|
||||||
|
201 6
|
||||||
|
202 8
|
||||||
|
203 7
|
||||||
|
204 7
|
||||||
|
205 5
|
||||||
|
include/save_master_gtid.inc
|
||||||
|
include/start_slave.inc
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||||
|
a b
|
||||||
|
201 6
|
||||||
|
202 8
|
||||||
|
203 7
|
||||||
|
204 7
|
||||||
|
205 5
|
||||||
|
include/stop_slave.inc
|
||||||
|
SET GLOBAL debug_dbug= @old_debg;
|
||||||
|
SET GLOBAL max_relay_log_size= @old_max;
|
||||||
include/start_slave.inc
|
include/start_slave.inc
|
||||||
include/stop_slave.inc
|
include/stop_slave.inc
|
||||||
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||||
|
@@ -2215,7 +2215,7 @@ SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
|
|||||||
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
|
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
|
||||||
|
|
||||||
--source include/stop_slave.inc
|
--source include/stop_slave.inc
|
||||||
SET GLOBAL debug_dbug= @old_debug;
|
SET GLOBAL debug_dbug= @old_dbug;
|
||||||
--source include/start_slave.inc
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
|
||||||
@@ -2262,10 +2262,119 @@ SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
|
|||||||
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
|
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
|
||||||
|
|
||||||
--source include/stop_slave.inc
|
--source include/stop_slave.inc
|
||||||
SET GLOBAL debug_dbug= @old_debug;
|
SET GLOBAL debug_dbug= @old_dbug;
|
||||||
--source include/start_slave.inc
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
|
||||||
|
--echo *** MDEV-8031: Parallel replication stops on "connection killed" error (probably incorrectly handled deadlock kill) ***
|
||||||
|
|
||||||
|
--connection server_1
|
||||||
|
INSERT INTO t3 VALUES (201,0), (202,0);
|
||||||
|
--source include/save_master_gtid.inc
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||||
|
SET GLOBAL debug_dbug= '+d,inject_mdev8031';
|
||||||
|
|
||||||
|
--connection server_1
|
||||||
|
# We artificially create a situation that hopefully resembles the original
|
||||||
|
# bug which was only seen "in the wild", and only once.
|
||||||
|
# Setup a fake group commit with lots of conflicts that will lead to deadloc
|
||||||
|
# kill. The slave DBUG injection causes the slave to be deadlock killed at
|
||||||
|
# a particular point during the retry, and then later do a small sleep at
|
||||||
|
# another critical point where the prior transaction then has a chance to
|
||||||
|
# complete. Finally an extra KILL check catches an unhandled, lingering
|
||||||
|
# deadlock kill. So rather artificial, but at least it exercises the
|
||||||
|
# relevant code paths.
|
||||||
|
SET @old_dbug= @@SESSION.debug_dbug;
|
||||||
|
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||||
|
|
||||||
|
SET @commit_id= 10200;
|
||||||
|
INSERT INTO t3 VALUES (203, 1);
|
||||||
|
INSERT INTO t3 VALUES (204, 1);
|
||||||
|
INSERT INTO t3 VALUES (205, 1);
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=205;
|
||||||
|
UPDATE t3 SET b=b+1 WHERE a=205;
|
||||||
|
SET SESSION debug_dbug=@old_dbug;
|
||||||
|
|
||||||
|
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||||
|
--source include/save_master_gtid.inc
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
--source include/start_slave.inc
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
|
||||||
|
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
SET GLOBAL debug_dbug= @old_dbug;
|
||||||
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
|
||||||
|
--echo *** Check getting deadlock killed inside open_binlog() during retry. ***
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||||
|
SET GLOBAL debug_dbug= '+d,inject_retry_event_group_open_binlog_kill';
|
||||||
|
SET @old_max= @@GLOBAL.max_relay_log_size;
|
||||||
|
SET GLOBAL max_relay_log_size= 4096;
|
||||||
|
|
||||||
|
--connection server_1
|
||||||
|
SET @old_dbug= @@SESSION.debug_dbug;
|
||||||
|
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||||
|
|
||||||
|
--let $large= `SELECT REPEAT("*", 8192)`
|
||||||
|
SET @commit_id= 10210;
|
||||||
|
--echo Omit long queries that cause relaylog rotations and transaction retries...
|
||||||
|
--disable_query_log
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=201 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=201 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=201 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=203 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=203 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=204 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=204 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=204 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=203 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=205 /* $large */;
|
||||||
|
eval UPDATE t3 SET b=b+1 WHERE a=205 /* $large */;
|
||||||
|
--enable_query_log
|
||||||
|
SET SESSION debug_dbug=@old_dbug;
|
||||||
|
|
||||||
|
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||||
|
--source include/save_master_gtid.inc
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
--source include/start_slave.inc
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
|
||||||
|
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
SET GLOBAL debug_dbug= @old_debg;
|
||||||
|
SET GLOBAL max_relay_log_size= @old_max;
|
||||||
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Clean up.
|
# Clean up.
|
||||||
--connection server_2
|
--connection server_2
|
||||||
--source include/stop_slave.inc
|
--source include/stop_slave.inc
|
||||||
|
@@ -2,6 +2,7 @@
|
|||||||
#include "rpl_parallel.h"
|
#include "rpl_parallel.h"
|
||||||
#include "slave.h"
|
#include "slave.h"
|
||||||
#include "rpl_mi.h"
|
#include "rpl_mi.h"
|
||||||
|
#include "sql_parse.h"
|
||||||
#include "debug_sync.h"
|
#include "debug_sync.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -333,7 +334,7 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
|
|||||||
IO_CACHE rlog;
|
IO_CACHE rlog;
|
||||||
LOG_INFO linfo;
|
LOG_INFO linfo;
|
||||||
File fd= (File)-1;
|
File fd= (File)-1;
|
||||||
const char *errmsg= NULL;
|
const char *errmsg;
|
||||||
inuse_relaylog *ir= rgi->relay_log;
|
inuse_relaylog *ir= rgi->relay_log;
|
||||||
uint64 event_count;
|
uint64 event_count;
|
||||||
uint64 events_to_execute= rgi->retry_event_count;
|
uint64 events_to_execute= rgi->retry_event_count;
|
||||||
@@ -349,6 +350,7 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
|
|||||||
do_retry:
|
do_retry:
|
||||||
event_count= 0;
|
event_count= 0;
|
||||||
err= 0;
|
err= 0;
|
||||||
|
errmsg= NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
If we already started committing before getting the deadlock (or other
|
If we already started committing before getting the deadlock (or other
|
||||||
@@ -384,7 +386,16 @@ do_retry:
|
|||||||
*/
|
*/
|
||||||
if(thd->wait_for_commit_ptr)
|
if(thd->wait_for_commit_ptr)
|
||||||
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
|
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
|
||||||
|
DBUG_EXECUTE_IF("inject_mdev8031", {
|
||||||
|
/* Simulate that we get deadlock killed at this exact point. */
|
||||||
|
rgi->killed_for_retry= true;
|
||||||
|
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||||
|
thd->killed= KILL_CONNECTION;
|
||||||
|
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
||||||
|
});
|
||||||
rgi->cleanup_context(thd, 1);
|
rgi->cleanup_context(thd, 1);
|
||||||
|
thd->reset_killed();
|
||||||
|
thd->clear_error();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
If we retry due to a deadlock kill that occured during the commit step, we
|
If we retry due to a deadlock kill that occured during the commit step, we
|
||||||
@@ -428,10 +439,22 @@ do_retry:
|
|||||||
complete its commit.
|
complete its commit.
|
||||||
*/
|
*/
|
||||||
thd->clear_error();
|
thd->clear_error();
|
||||||
|
thd->reset_killed();
|
||||||
if(thd->wait_for_commit_ptr)
|
if(thd->wait_for_commit_ptr)
|
||||||
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
|
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
|
||||||
|
DBUG_EXECUTE_IF("inject_mdev8031", {
|
||||||
|
/* Inject a small sleep to give prior transaction a chance to commit. */
|
||||||
|
my_sleep(100000);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Let us clear any lingering deadlock kill one more time, here after
|
||||||
|
wait_for_prior_commit() has completed. This should rule out any
|
||||||
|
possibility of an old deadlock kill lingering on beyond this point.
|
||||||
|
*/
|
||||||
|
thd->reset_killed();
|
||||||
|
|
||||||
strmake_buf(log_name, ir->name);
|
strmake_buf(log_name, ir->name);
|
||||||
if ((fd= open_binlog(&rlog, log_name, &errmsg)) <0)
|
if ((fd= open_binlog(&rlog, log_name, &errmsg)) <0)
|
||||||
{
|
{
|
||||||
@@ -447,6 +470,14 @@ do_retry:
|
|||||||
err= 1;
|
err= 1;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
DBUG_EXECUTE_IF("inject_mdev8031", {
|
||||||
|
/* Simulate pending KILL caught in read_relay_log_description_event(). */
|
||||||
|
if (thd->check_killed()) {
|
||||||
|
thd->send_kill_message();
|
||||||
|
err= 1;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
});
|
||||||
my_b_seek(&rlog, cur_offset);
|
my_b_seek(&rlog, cur_offset);
|
||||||
|
|
||||||
do
|
do
|
||||||
@@ -469,7 +500,7 @@ do_retry:
|
|||||||
{
|
{
|
||||||
errmsg= "slave SQL thread aborted because of I/O error";
|
errmsg= "slave SQL thread aborted because of I/O error";
|
||||||
err= 1;
|
err= 1;
|
||||||
goto err;
|
goto check_retry;
|
||||||
}
|
}
|
||||||
if (rlog.error > 0)
|
if (rlog.error > 0)
|
||||||
{
|
{
|
||||||
@@ -498,10 +529,25 @@ do_retry:
|
|||||||
}
|
}
|
||||||
strmake_buf(log_name ,linfo.log_file_name);
|
strmake_buf(log_name ,linfo.log_file_name);
|
||||||
|
|
||||||
|
DBUG_EXECUTE_IF("inject_retry_event_group_open_binlog_kill", {
|
||||||
|
if (retries < 2)
|
||||||
|
{
|
||||||
|
/* Simulate that we get deadlock killed during open_binlog(). */
|
||||||
|
mysql_reset_thd_for_next_command(thd);
|
||||||
|
rgi->killed_for_retry= true;
|
||||||
|
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||||
|
thd->killed= KILL_CONNECTION;
|
||||||
|
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
||||||
|
thd->send_kill_message();
|
||||||
|
fd= (File)-1;
|
||||||
|
err= 1;
|
||||||
|
goto check_retry;
|
||||||
|
}
|
||||||
|
});
|
||||||
if ((fd= open_binlog(&rlog, log_name, &errmsg)) <0)
|
if ((fd= open_binlog(&rlog, log_name, &errmsg)) <0)
|
||||||
{
|
{
|
||||||
err= 1;
|
err= 1;
|
||||||
goto err;
|
goto check_retry;
|
||||||
}
|
}
|
||||||
/* Loop to try again on the new log file. */
|
/* Loop to try again on the new log file. */
|
||||||
}
|
}
|
||||||
@@ -544,17 +590,22 @@ do_retry:
|
|||||||
if (retries == 0) err= dbug_simulate_tmp_error(rgi, thd););
|
if (retries == 0) err= dbug_simulate_tmp_error(rgi, thd););
|
||||||
DBUG_EXECUTE_IF("rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100",
|
DBUG_EXECUTE_IF("rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100",
|
||||||
err= dbug_simulate_tmp_error(rgi, thd););
|
err= dbug_simulate_tmp_error(rgi, thd););
|
||||||
if (err)
|
if (!err)
|
||||||
{
|
continue;
|
||||||
|
|
||||||
|
check_retry:
|
||||||
convert_kill_to_deadlock_error(rgi);
|
convert_kill_to_deadlock_error(rgi);
|
||||||
if (has_temporary_error(thd))
|
if (has_temporary_error(thd))
|
||||||
{
|
{
|
||||||
++retries;
|
++retries;
|
||||||
if (retries < slave_trans_retries)
|
if (retries < slave_trans_retries)
|
||||||
|
{
|
||||||
|
if (fd >= 0)
|
||||||
{
|
{
|
||||||
end_io_cache(&rlog);
|
end_io_cache(&rlog);
|
||||||
mysql_file_close(fd, MYF(MY_WME));
|
mysql_file_close(fd, MYF(MY_WME));
|
||||||
fd= (File)-1;
|
fd= (File)-1;
|
||||||
|
}
|
||||||
goto do_retry;
|
goto do_retry;
|
||||||
}
|
}
|
||||||
sql_print_error("Slave worker thread retried transaction %lu time(s) "
|
sql_print_error("Slave worker thread retried transaction %lu time(s) "
|
||||||
@@ -563,7 +614,7 @@ do_retry:
|
|||||||
slave_trans_retries);
|
slave_trans_retries);
|
||||||
}
|
}
|
||||||
goto err;
|
goto err;
|
||||||
}
|
|
||||||
} while (event_count < events_to_execute);
|
} while (event_count < events_to_execute);
|
||||||
|
|
||||||
err:
|
err:
|
||||||
|
Reference in New Issue
Block a user