1
0
mirror of https://github.com/MariaDB/server.git synced 2025-09-02 09:41:40 +03:00

MDEV-7847: "Slave worker thread retried transaction 10 time(s) in vain, giving up", followed by replication hanging

This patch fixes a bug in the error handling in parallel replication, when one
worker thread gets a failure and other worker threads processing later
transactions have to rollback and abort.

The problem was with the lifetime of group_commit_orderer objects (GCOs).
A GCO is freed when we register that its last event group has committed. This
relies on register_wait_for_prior_commit() and wait_for_prior_commit() to
ensure that the fact that T2 has committed implies that any earlier T1 has
also committed, and can thus no longer execute mark_start_commit().

However, in the error case, the code was skipping the
register_wait_for_prior_commit() and wait_for_prior_commit() calls. Thus
commit ordering was not guaranteed, and a GCO could be freed too early. Then a
later mark_start_commit() would reference deallocated GCO, which could lead to
lost wakeup (causing slave threads to hang) or other corruption.

This patch makes also the error case respect commit order. This way, also the
error case gets the GCO lifetime correct, and the hang no longer occurs.
This commit is contained in:
Kristian Nielsen
2015-03-30 14:33:44 +02:00
parent a4082918c8
commit 880f2273fd
5 changed files with 515 additions and 12 deletions

View File

@@ -1136,11 +1136,297 @@ SET GLOBAL debug_dbug=@old_dbug;
SET GLOBAL slave_parallel_threads=0;
SET GLOBAL slave_parallel_threads=10;
include/start_slave.inc
*** MDEV-7847: "Slave worker thread retried transaction 10 time(s) in vain, giving up", followed by replication hanging ***
*** MDEV-7882: Excessive transaction retry in parallel replication ***
CREATE TABLE t7 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
CREATE TABLE t8 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=40;
SELECT @old_retries:=@@GLOBAL.slave_transaction_retries;
@old_retries:=@@GLOBAL.slave_transaction_retries
10
SET GLOBAL slave_transaction_retries= 5;
INSERT INTO t7 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
SET @old_dbug= @@SESSION.debug_dbug;
SET @commit_id= 42;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
INSERT INTO t8 VALUES (1,1);
INSERT INTO t8 VALUES (2,2);
INSERT INTO t8 VALUES (3,3);
INSERT INTO t8 VALUES (4,4);
INSERT INTO t8 VALUES (5,5);
INSERT INTO t8 VALUES (6,6);
INSERT INTO t8 VALUES (7,7);
INSERT INTO t8 VALUES (8,8);
UPDATE t7 SET b=9 WHERE a=3;
UPDATE t7 SET b=10 WHERE a=3;
UPDATE t7 SET b=11 WHERE a=3;
INSERT INTO t8 VALUES (12,12);
INSERT INTO t8 VALUES (13,13);
UPDATE t7 SET b=14 WHERE a=3;
UPDATE t7 SET b=15 WHERE a=3;
INSERT INTO t8 VALUES (16,16);
UPDATE t7 SET b=17 WHERE a=3;
INSERT INTO t8 VALUES (18,18);
INSERT INTO t8 VALUES (19,19);
UPDATE t7 SET b=20 WHERE a=3;
INSERT INTO t8 VALUES (21,21);
UPDATE t7 SET b=22 WHERE a=3;
INSERT INTO t8 VALUES (23,24);
INSERT INTO t8 VALUES (24,24);
UPDATE t7 SET b=25 WHERE a=3;
INSERT INTO t8 VALUES (26,26);
UPDATE t7 SET b=27 WHERE a=3;
BEGIN;
INSERT INTO t8 VALUES (28,28);
INSERT INTO t8 VALUES (29,28), (30,28);
INSERT INTO t8 VALUES (31,28);
INSERT INTO t8 VALUES (32,28);
INSERT INTO t8 VALUES (33,28);
INSERT INTO t8 VALUES (34,28);
INSERT INTO t8 VALUES (35,28);
INSERT INTO t8 VALUES (36,28);
INSERT INTO t8 VALUES (37,28);
INSERT INTO t8 VALUES (38,28);
INSERT INTO t8 VALUES (39,28);
INSERT INTO t8 VALUES (40,28);
INSERT INTO t8 VALUES (41,28);
INSERT INTO t8 VALUES (42,28);
COMMIT;
SET @commit_id=43;
INSERT INTO t8 VALUES (43,43);
INSERT INTO t8 VALUES (44,44);
UPDATE t7 SET b=45 WHERE a=3;
INSERT INTO t8 VALUES (46,46);
INSERT INTO t8 VALUES (47,47);
UPDATE t7 SET b=48 WHERE a=3;
INSERT INTO t8 VALUES (49,49);
INSERT INTO t8 VALUES (50,50);
SET @commit_id=44;
INSERT INTO t8 VALUES (51,51);
INSERT INTO t8 VALUES (52,52);
UPDATE t7 SET b=53 WHERE a=3;
INSERT INTO t8 VALUES (54,54);
INSERT INTO t8 VALUES (55,55);
UPDATE t7 SET b=56 WHERE a=3;
INSERT INTO t8 VALUES (57,57);
UPDATE t7 SET b=58 WHERE a=3;
INSERT INTO t8 VALUES (58,58);
INSERT INTO t8 VALUES (59,59);
INSERT INTO t8 VALUES (60,60);
INSERT INTO t8 VALUES (61,61);
UPDATE t7 SET b=62 WHERE a=3;
INSERT INTO t8 VALUES (63,63);
INSERT INTO t8 VALUES (64,64);
INSERT INTO t8 VALUES (65,65);
INSERT INTO t8 VALUES (66,66);
UPDATE t7 SET b=67 WHERE a=3;
INSERT INTO t8 VALUES (68,68);
UPDATE t7 SET b=69 WHERE a=3;
UPDATE t7 SET b=70 WHERE a=3;
UPDATE t7 SET b=71 WHERE a=3;
INSERT INTO t8 VALUES (72,72);
UPDATE t7 SET b=73 WHERE a=3;
UPDATE t7 SET b=74 WHERE a=3;
UPDATE t7 SET b=75 WHERE a=3;
UPDATE t7 SET b=76 WHERE a=3;
INSERT INTO t8 VALUES (77,77);
UPDATE t7 SET b=78 WHERE a=3;
INSERT INTO t8 VALUES (79,79);
UPDATE t7 SET b=80 WHERE a=3;
INSERT INTO t8 VALUES (81,81);
UPDATE t7 SET b=82 WHERE a=3;
INSERT INTO t8 VALUES (83,83);
UPDATE t7 SET b=84 WHERE a=3;
SET @commit_id=45;
INSERT INTO t8 VALUES (85,85);
UPDATE t7 SET b=86 WHERE a=3;
INSERT INTO t8 VALUES (87,87);
SET @commit_id=46;
INSERT INTO t8 VALUES (88,88);
INSERT INTO t8 VALUES (89,89);
INSERT INTO t8 VALUES (90,90);
SET SESSION debug_dbug=@old_dbug;
INSERT INTO t8 VALUES (91,91);
INSERT INTO t8 VALUES (92,92);
INSERT INTO t8 VALUES (93,93);
INSERT INTO t8 VALUES (94,94);
INSERT INTO t8 VALUES (95,95);
INSERT INTO t8 VALUES (96,96);
INSERT INTO t8 VALUES (97,97);
INSERT INTO t8 VALUES (98,98);
INSERT INTO t8 VALUES (99,99);
SELECT * FROM t7 ORDER BY a;
a b
1 1
2 2
3 86
4 4
5 5
SELECT * FROM t8 ORDER BY a;
a b
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
12 12
13 13
16 16
18 18
19 19
21 21
23 24
24 24
26 26
28 28
29 28
30 28
31 28
32 28
33 28
34 28
35 28
36 28
37 28
38 28
39 28
40 28
41 28
42 28
43 43
44 44
46 46
47 47
49 49
50 50
51 51
52 52
54 54
55 55
57 57
58 58
59 59
60 60
61 61
63 63
64 64
65 65
66 66
68 68
72 72
77 77
79 79
81 81
83 83
85 85
87 87
88 88
89 89
90 90
91 91
92 92
93 93
94 94
95 95
96 96
97 97
98 98
99 99
include/save_master_gtid.inc
include/start_slave.inc
include/sync_with_master_gtid.inc
SELECT * FROM t7 ORDER BY a;
a b
1 1
2 2
3 86
4 4
5 5
SELECT * FROM t8 ORDER BY a;
a b
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
12 12
13 13
16 16
18 18
19 19
21 21
23 24
24 24
26 26
28 28
29 28
30 28
31 28
32 28
33 28
34 28
35 28
36 28
37 28
38 28
39 28
40 28
41 28
42 28
43 43
44 44
46 46
47 47
49 49
50 50
51 51
52 52
54 54
55 55
57 57
58 58
59 59
60 60
61 61
63 63
64 64
65 65
66 66
68 68
72 72
77 77
79 79
81 81
83 83
85 85
87 87
88 88
89 89
90 90
91 91
92 92
93 93
94 94
95 95
96 96
97 97
98 98
99 99
include/stop_slave.inc
SET GLOBAL slave_transaction_retries= @old_retries;
include/start_slave.inc
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc
SET DEBUG_SYNC= 'RESET';
DROP function foo;
DROP TABLE t1,t2,t3,t4,t5,t6;
DROP TABLE t1,t2,t3,t4,t5,t6,t7,t8;
SET DEBUG_SYNC= 'RESET';
include/rpl_end.inc