mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-34998: master can stop responding after cluster vote to evict a node
After cluster vote to evict a node that failed a transaction, current master can't commit anymore. Error voting for joiner in the JOINED state was broken because the group-wide commit cut (implicit SUCCESS vote) was not taken into account when processing error vote request from the JOINED node. This commit adds 3 MTR tests to verify the fix in the galera library works as designed. Requires Galera library commit 91f0090a05e96c3cc353b80d961ede45cefb9279 (galera library version > 26.4.19). Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
committed by
Julius Goryavsky
parent
cb7e39b75b
commit
ec5068fe59
109
mysql-test/suite/galera/r/galera_vote_during_ist.result
Normal file
109
mysql-test/suite/galera/r/galera_vote_during_ist.result
Normal file
@@ -0,0 +1,109 @@
|
||||
connection node_4;
|
||||
connection node_3;
|
||||
connection node_2;
|
||||
connection node_1;
|
||||
connection node_1;
|
||||
connection node_2;
|
||||
connection node_3;
|
||||
connection node_4;
|
||||
connection node_1;
|
||||
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
CREATE PROCEDURE p1(IN max INT)
|
||||
BEGIN
|
||||
DECLARE i INT;
|
||||
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
|
||||
SET i = 0;
|
||||
WHILE i < max DO
|
||||
INSERT IGNORE INTO t1 VALUES (DEFAULT);
|
||||
SET i = i + 1;
|
||||
END WHILE;
|
||||
END|
|
||||
CALL p1(130);
|
||||
connection node_4;
|
||||
Shutting down server 4...
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_2;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_3;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
Server 4 left the cluster
|
||||
connection node_1;
|
||||
CALL p1(130);
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
connection node_2;
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
connection node_3;
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
INSERT INTO t2 VALUES (DEFAULT);
|
||||
CALL p1(130);
|
||||
connection node_1;
|
||||
SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation";
|
||||
Restarting server 4
|
||||
Wait for server 1 to become a donor
|
||||
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached";
|
||||
Server 1 got SST request from server 4
|
||||
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue";
|
||||
SET GLOBAL debug = "";
|
||||
SET DEBUG_SYNC='RESET';
|
||||
Waiting for server 4 to leave the cluster
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_2;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_3;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_4;
|
||||
Server 4 left the cluster, killing it...
|
||||
Killed server 4...
|
||||
Restarting server 4...
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_1;
|
||||
SELECT count(*) AS expect1_390 FROM t1;
|
||||
expect1_390
|
||||
390
|
||||
SELECT count(*) AS expect1_1 FROM t2;
|
||||
expect1_1
|
||||
1
|
||||
connection node_2;
|
||||
SELECT count(*) AS expect2_390 FROM t1;
|
||||
expect2_390
|
||||
390
|
||||
SELECT count(*) AS expect2_1 FROM t2;
|
||||
expect2_1
|
||||
1
|
||||
connection node_3;
|
||||
SELECT count(*) AS expect3_390 FROM t1;
|
||||
expect3_390
|
||||
390
|
||||
SELECT count(*) AS expect3_1 FROM t2;
|
||||
expect3_1
|
||||
1
|
||||
connection node_4;
|
||||
SELECT count(*) AS expect4_390 FROM t1;
|
||||
expect4_390
|
||||
390
|
||||
SELECT count(*) AS expect4_1 FROM t2;
|
||||
expect4_1
|
||||
1
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t2;
|
||||
DROP PROCEDURE p1;
|
||||
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
|
||||
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
|
||||
CALL mtr.add_suppression("Inconsistency detected: Failed on preordered");
|
||||
CALL mtr.add_suppression("Failed to apply write set");
|
93
mysql-test/suite/galera/r/galera_vote_joined_apply.result
Normal file
93
mysql-test/suite/galera/r/galera_vote_joined_apply.result
Normal file
@@ -0,0 +1,93 @@
|
||||
connection node_4;
|
||||
connection node_3;
|
||||
connection node_2;
|
||||
connection node_1;
|
||||
connection node_1;
|
||||
connection node_2;
|
||||
connection node_3;
|
||||
connection node_4;
|
||||
connection node_1;
|
||||
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
CREATE PROCEDURE p1(IN max INT)
|
||||
BEGIN
|
||||
DECLARE i INT;
|
||||
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
|
||||
SET i = 0;
|
||||
WHILE i < max DO
|
||||
INSERT IGNORE INTO t1 VALUES (DEFAULT);
|
||||
SET i = i + 1;
|
||||
END WHILE;
|
||||
END|
|
||||
CALL p1(130);
|
||||
connection node_4;
|
||||
Shutting down server 4...
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
SET GLOBAL debug = "+d,sync.wsrep_donor_state";
|
||||
connection node_4;
|
||||
Restarting server 4...
|
||||
connection node_1;
|
||||
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached";
|
||||
Tables on server 1 flushed and locked for SST to server 4
|
||||
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state";
|
||||
SET GLOBAL debug = "";
|
||||
SET DEBUG_SYNC='RESET';
|
||||
Wait for the state snapshot to be copied to server 4
|
||||
SST script unlocked server 1
|
||||
connection node_1;
|
||||
CALL p1(130);
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
connection node_2;
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
connection node_3;
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
INSERT INTO t2 VALUES (DEFAULT);
|
||||
CALL p1(130);
|
||||
Waiting for server 4 to leave the cluster
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_2;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_4;
|
||||
Server 4 left the cluster, killing it...
|
||||
Killed server 4...
|
||||
Restarting server 4...
|
||||
DROP TABLE t2;
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_1;
|
||||
SELECT count(*) AS expect1_390 FROM t1;
|
||||
expect1_390
|
||||
390
|
||||
connection node_2;
|
||||
SELECT count(*) AS expect2_390 FROM t1;
|
||||
expect2_390
|
||||
390
|
||||
connection node_3;
|
||||
SELECT count(*) AS expect3_390 FROM t1;
|
||||
expect3_390
|
||||
390
|
||||
connection node_4;
|
||||
SELECT count(*) AS expect4_390 FROM t1;
|
||||
expect4_390
|
||||
390
|
||||
DROP TABLE t1;
|
||||
DROP PROCEDURE p1;
|
||||
connection node_4;
|
||||
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
|
||||
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
|
||||
CALL mtr.add_suppression("Inconsistency detected: Inconsistent by consensus");
|
||||
CALL mtr.add_suppression("Failed to apply write set: gtid:");
|
101
mysql-test/suite/galera/r/galera_vote_joined_skip.result
Normal file
101
mysql-test/suite/galera/r/galera_vote_joined_skip.result
Normal file
@@ -0,0 +1,101 @@
|
||||
connection node_4;
|
||||
connection node_3;
|
||||
connection node_2;
|
||||
connection node_1;
|
||||
connection node_1;
|
||||
connection node_2;
|
||||
connection node_3;
|
||||
connection node_4;
|
||||
connection node_1;
|
||||
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
CREATE PROCEDURE p1(IN max INT)
|
||||
BEGIN
|
||||
DECLARE i INT;
|
||||
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
|
||||
SET i = 0;
|
||||
WHILE i < max DO
|
||||
INSERT IGNORE INTO t1 VALUES (DEFAULT);
|
||||
SET i = i + 1;
|
||||
END WHILE;
|
||||
END|
|
||||
CALL p1(130);
|
||||
connection node_4;
|
||||
Shutting down server 4...
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
SET GLOBAL debug = "+d,sync.wsrep_donor_state";
|
||||
connection node_4;
|
||||
Restarting server 4...
|
||||
connection node_1;
|
||||
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached";
|
||||
Tables on server 1 flushed and locked for SST to server 4
|
||||
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state";
|
||||
SET GLOBAL debug = "";
|
||||
SET DEBUG_SYNC='RESET';
|
||||
Wait for the state snapshot to be copied to server 4
|
||||
SST script unlocked server 1
|
||||
connection node_1;
|
||||
CALL p1(130);
|
||||
connection node_3;
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
INSERT INTO t2 VALUES (DEFAULT);
|
||||
SET SESSION wsrep_on = OFF;
|
||||
connection node_1;
|
||||
CALL p1(130);
|
||||
Waiting for server 3 to leave the cluster
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_2;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_4;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_3;
|
||||
Server 3 left the cluster, killing it...
|
||||
Killed server 3.
|
||||
Restarting server 3...
|
||||
Waiting for server 3 to rejoin the cluster
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_3;
|
||||
sleeping for 20
|
||||
Waiting ready
|
||||
Server 3 restarted.
|
||||
connection node_1;
|
||||
SET SESSION wsrep_on = ON;
|
||||
SET SESSION wsrep_sync_wait = 15;
|
||||
connection node_1;
|
||||
SELECT count(*) AS expect1_390 FROM t1;
|
||||
expect1_390
|
||||
390
|
||||
connection node_2;
|
||||
SELECT count(*) AS expect2_390 FROM t1;
|
||||
expect2_390
|
||||
390
|
||||
connection node_3;
|
||||
SELECT count(*) AS expect3_390 FROM t1;
|
||||
expect3_390
|
||||
390
|
||||
connection node_4;
|
||||
SELECT count(*) AS expect4_390 FROM t1;
|
||||
expect4_390
|
||||
390
|
||||
DROP TABLE t1;
|
||||
DROP PROCEDURE p1;
|
||||
connection node_1;
|
||||
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
|
||||
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
|
||||
connection node_2;
|
||||
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
|
||||
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
|
||||
connection node_3;
|
||||
CALL mtr.add_suppression("Vote 0 \\(success\\) on .* is inconsistent with group");
|
||||
connection node_4;
|
||||
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
|
||||
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
|
20
mysql-test/suite/galera/t/galera_vote_during_ist.cnf
Normal file
20
mysql-test/suite/galera/t/galera_vote_during_ist.cnf
Normal file
@@ -0,0 +1,20 @@
|
||||
!include ../galera_4nodes.cnf
|
||||
|
||||
[mysqld]
|
||||
wsrep-ignore-apply-errors=0
|
||||
|
||||
[mysqld.1]
|
||||
wsrep_node_name='node_1'
|
||||
|
||||
[mysqld.2]
|
||||
wsrep_node_name='node_2'
|
||||
|
||||
[mysqld.3]
|
||||
wsrep_node_name='node_3'
|
||||
|
||||
[mysqld.4]
|
||||
wsrep_node_name='node_4'
|
||||
wsrep_sst_donor='node_1'
|
||||
|
||||
[ENV]
|
||||
galera_cluster_size=4
|
158
mysql-test/suite/galera/t/galera_vote_during_ist.test
Normal file
158
mysql-test/suite/galera/t/galera_vote_during_ist.test
Normal file
@@ -0,0 +1,158 @@
|
||||
#
|
||||
# Test a case where a joiner encounters an error during IST
|
||||
# Instead of voting it should assume error and bail out.
|
||||
#
|
||||
|
||||
--source include/galera_cluster.inc
|
||||
--source include/big_test.inc
|
||||
--source include/have_debug_sync.inc
|
||||
|
||||
--let $node_1=node_1
|
||||
--let $node_2=node_2
|
||||
--let $node_3=node_3
|
||||
--let $node_4=node_4
|
||||
--source ../include/auto_increment_offset_save.inc
|
||||
|
||||
# create table t1 and procedure p1 to generate wirtesets
|
||||
--connection node_1
|
||||
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
|
||||
DELIMITER |;
|
||||
CREATE PROCEDURE p1(IN max INT)
|
||||
BEGIN
|
||||
DECLARE i INT;
|
||||
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
|
||||
|
||||
SET i = 0;
|
||||
WHILE i < max DO
|
||||
INSERT IGNORE INTO t1 VALUES (DEFAULT);
|
||||
SET i = i + 1;
|
||||
END WHILE;
|
||||
END|
|
||||
DELIMITER ;|
|
||||
|
||||
CALL p1(130);
|
||||
|
||||
--connection node_4
|
||||
--echo Shutting down server 4...
|
||||
--let $node_4_server_id= `SELECT @@server_id`
|
||||
--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect
|
||||
--let $node_4_pid_file= `SELECT @@pid_file`
|
||||
--source include/shutdown_mysqld.inc
|
||||
|
||||
# Wait for node #4 to leave cluster
|
||||
--let $members = 3
|
||||
--connection node_1
|
||||
--source include/wsrep_wait_membership.inc
|
||||
--connection node_2
|
||||
--source include/wsrep_wait_membership.inc
|
||||
--connection node_3
|
||||
--source include/wsrep_wait_membership.inc
|
||||
--echo Server 4 left the cluster
|
||||
|
||||
# Create some writesets for IST
|
||||
--connection node_1
|
||||
CALL p1(130);
|
||||
|
||||
# Create a writeset that node 4 won't be able to apply by creating a table
|
||||
# that won't be present in the replication stream
|
||||
--connection node_1
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
|
||||
--connection node_2
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
|
||||
--connection node_3
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
|
||||
# This should cause error during IST
|
||||
INSERT INTO t2 VALUES (DEFAULT);
|
||||
|
||||
# make sure nodes 1,2,3 progress far enough for commit cut update
|
||||
CALL p1(130);
|
||||
|
||||
--connection node_1
|
||||
# prepare to stop SST donor thread when it receives a request from starting node #4
|
||||
SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation";
|
||||
|
||||
--echo Restarting server 4
|
||||
# Need to use this form instead of start_mysqld.inc because the latter is blocking
|
||||
--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name
|
||||
|
||||
--echo Wait for server 1 to become a donor
|
||||
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached";
|
||||
--echo Server 1 got SST request from server 4
|
||||
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue";
|
||||
SET GLOBAL debug = "";
|
||||
SET DEBUG_SYNC='RESET';
|
||||
|
||||
#
|
||||
# After this point node #4 shall proceed to IST and bail out
|
||||
#
|
||||
|
||||
--echo Waiting for server 4 to leave the cluster
|
||||
--let $members = 3
|
||||
--source include/wsrep_wait_membership.inc
|
||||
--connection node_2
|
||||
--source include/wsrep_wait_membership.inc
|
||||
--connection node_3
|
||||
--source include/wsrep_wait_membership.inc
|
||||
|
||||
--connection node_4
|
||||
--echo Server 4 left the cluster, killing it...
|
||||
|
||||
# Kill the connected server
|
||||
--exec echo "wait" > $node_4_expect_file_name
|
||||
--let KILL_NODE_PIDFILE = $node_4_pid_file
|
||||
--perl
|
||||
my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
|
||||
my $mysqld_pid = `cat $pid_filename`;
|
||||
chomp($mysqld_pid);
|
||||
system("kill -9 $mysqld_pid");
|
||||
exit(0);
|
||||
EOF
|
||||
--echo Killed server 4...
|
||||
--source include/wait_until_disconnected.inc
|
||||
--echo Restarting server 4...
|
||||
--source include/start_mysqld.inc
|
||||
--source include/galera_wait_ready.inc
|
||||
|
||||
# Confirm node #4 has rejoined
|
||||
--connection node_1
|
||||
--let $members = 4
|
||||
--source include/wsrep_wait_membership.inc
|
||||
|
||||
# Confirm that all is good and all nodes have identical data
|
||||
|
||||
--connection node_1
|
||||
SELECT count(*) AS expect1_390 FROM t1;
|
||||
SELECT count(*) AS expect1_1 FROM t2;
|
||||
|
||||
--connection node_2
|
||||
SELECT count(*) AS expect2_390 FROM t1;
|
||||
SELECT count(*) AS expect2_1 FROM t2;
|
||||
|
||||
--connection node_3
|
||||
SELECT count(*) AS expect3_390 FROM t1;
|
||||
SELECT count(*) AS expect3_1 FROM t2;
|
||||
|
||||
--connection node_4
|
||||
SELECT count(*) AS expect4_390 FROM t1;
|
||||
SELECT count(*) AS expect4_1 FROM t2;
|
||||
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t2;
|
||||
DROP PROCEDURE p1;
|
||||
|
||||
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
|
||||
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
|
||||
CALL mtr.add_suppression("Inconsistency detected: Failed on preordered");
|
||||
CALL mtr.add_suppression("Failed to apply write set");
|
||||
|
||||
--source ../include/auto_increment_offset_restore.inc
|
21
mysql-test/suite/galera/t/galera_vote_joined_apply.cnf
Normal file
21
mysql-test/suite/galera/t/galera_vote_joined_apply.cnf
Normal file
@@ -0,0 +1,21 @@
|
||||
!include ../galera_4nodes.cnf
|
||||
|
||||
[mysqld]
|
||||
wsrep-ignore-apply-errors=0
|
||||
|
||||
[mysqld.1]
|
||||
wsrep_node_name='node_1'
|
||||
|
||||
[mysqld.2]
|
||||
wsrep_node_name='node_2'
|
||||
|
||||
[mysqld.3]
|
||||
wsrep_node_name='node_3'
|
||||
|
||||
[mysqld.4]
|
||||
wsrep_node_name='node_4'
|
||||
wsrep_sst_donor='node_1'
|
||||
|
||||
[ENV]
|
||||
galera_cluster_size=4
|
||||
MTR_SST_JOINER_DELAY=20
|
73
mysql-test/suite/galera/t/galera_vote_joined_apply.test
Normal file
73
mysql-test/suite/galera/t/galera_vote_joined_apply.test
Normal file
@@ -0,0 +1,73 @@
|
||||
#
|
||||
# Test a case where a vote happens in JOINED state after SST on a writeset
|
||||
# that should be applied.
|
||||
#
|
||||
|
||||
--source galera_vote_joined_begin.inc
|
||||
#
|
||||
# At this point state snapshot has been copied, node 1 is operational and
|
||||
# we have about 10 seconds while everything we do will go into the replication
|
||||
# queue on node 4 which it will have to apply on top of the snapshot.
|
||||
#
|
||||
|
||||
# Increase replication queue on node_4
|
||||
--connection node_1
|
||||
CALL p1(130);
|
||||
|
||||
# Create a writeset that node 4 won't be able to apply by creating a table
|
||||
# that won't be present in the replication stream
|
||||
--connection node_1
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
|
||||
--connection node_2
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
|
||||
--connection node_3
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
|
||||
# This should cause node #4 to initiate a vote and leave the cluster
|
||||
INSERT INTO t2 VALUES (DEFAULT);
|
||||
|
||||
# make sure nodes 1,2,3 progress far enough for commit cut update
|
||||
CALL p1(130);
|
||||
|
||||
--echo Waiting for server 4 to leave the cluster
|
||||
--let $members = 3
|
||||
--source include/wsrep_wait_membership.inc
|
||||
--connection node_2
|
||||
--source include/wsrep_wait_membership.inc
|
||||
--connection node_1
|
||||
--source include/wsrep_wait_membership.inc
|
||||
|
||||
--connection node_4
|
||||
--echo Server 4 left the cluster, killing it...
|
||||
# Kill the connected server
|
||||
--exec echo "wait" > $node_4_expect_file_name
|
||||
--let KILL_NODE_PIDFILE = $node_4_pid_file
|
||||
--perl
|
||||
my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
|
||||
my $mysqld_pid = `cat $pid_filename`;
|
||||
chomp($mysqld_pid);
|
||||
system("kill -9 $mysqld_pid");
|
||||
exit(0);
|
||||
EOF
|
||||
--echo Killed server 4...
|
||||
--source include/wait_until_disconnected.inc
|
||||
--echo Restarting server 4...
|
||||
--source include/start_mysqld.inc
|
||||
--source include/galera_wait_ready.inc
|
||||
DROP TABLE t2;
|
||||
|
||||
--source galera_vote_joined_end.inc
|
||||
|
||||
--connection node_4
|
||||
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
|
||||
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
|
||||
CALL mtr.add_suppression("Inconsistency detected: Inconsistent by consensus");
|
||||
CALL mtr.add_suppression("Failed to apply write set: gtid:");
|
74
mysql-test/suite/galera/t/galera_vote_joined_begin.inc
Normal file
74
mysql-test/suite/galera/t/galera_vote_joined_begin.inc
Normal file
@@ -0,0 +1,74 @@
|
||||
# This file purpose is to set up node 4 to require SST which is artificaially
|
||||
# prolonged and as a result accumulate sufficient relication queue.
|
||||
# The contents of the qeuee are controlled in the sourcing test files.
|
||||
|
||||
--source include/galera_cluster.inc
|
||||
--source include/big_test.inc
|
||||
--source include/have_debug_sync.inc
|
||||
|
||||
--let $node_1=node_1
|
||||
--let $node_2=node_2
|
||||
--let $node_3=node_3
|
||||
--let $node_4=node_4
|
||||
--source ../include/auto_increment_offset_save.inc
|
||||
|
||||
# create table t1 and procedure p1 to generate wirtesets
|
||||
--connection node_1
|
||||
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
|
||||
DELIMITER |;
|
||||
CREATE PROCEDURE p1(IN max INT)
|
||||
BEGIN
|
||||
DECLARE i INT;
|
||||
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
|
||||
|
||||
SET i = 0;
|
||||
WHILE i < max DO
|
||||
INSERT IGNORE INTO t1 VALUES (DEFAULT);
|
||||
SET i = i + 1;
|
||||
END WHILE;
|
||||
END|
|
||||
DELIMITER ;|
|
||||
|
||||
# 130 events move the commit cut, it is essential in voting
|
||||
CALL p1(130);
|
||||
|
||||
--connection node_4
|
||||
--echo Shutting down server 4...
|
||||
--let $node_4_server_id= `SELECT @@server_id`
|
||||
--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect
|
||||
--let $node_4_pid_file= `SELECT @@pid_file`
|
||||
--source include/shutdown_mysqld.inc
|
||||
# enforce SST
|
||||
--exec rm -rf $MYSQLTEST_VARDIR/mysqld.4/data/grastate.dat
|
||||
|
||||
# Wait for node #4 to leave cluster
|
||||
--connection node_1
|
||||
--let $members = 3
|
||||
--source include/wsrep_wait_membership.inc
|
||||
|
||||
# prepare to stop SST donor thread when node is in donor state
|
||||
SET GLOBAL debug = "+d,sync.wsrep_donor_state";
|
||||
|
||||
--connection node_4
|
||||
--echo Restarting server 4...
|
||||
# Need to use this form instead of start_mysqld.inc because the latter is blocking
|
||||
--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name
|
||||
|
||||
# Wait for node #1 to become a donor
|
||||
--connection node_1
|
||||
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached";
|
||||
--echo Tables on server 1 flushed and locked for SST to server 4
|
||||
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state";
|
||||
SET GLOBAL debug = "";
|
||||
SET DEBUG_SYNC='RESET';
|
||||
|
||||
--echo Wait for the state snapshot to be copied to server 4
|
||||
--source include/galera_wait_ready.inc
|
||||
--echo SST script unlocked server 1
|
||||
|
||||
#
|
||||
# At this point state snapshot has been copied, node 1 is operational and
|
||||
# we have about 20 seconds while everything we do will go into the replication
|
||||
# queue on node 4 which it will have to apply on top of the snapshot.
|
||||
#
|
33
mysql-test/suite/galera/t/galera_vote_joined_end.inc
Normal file
33
mysql-test/suite/galera/t/galera_vote_joined_end.inc
Normal file
@@ -0,0 +1,33 @@
|
||||
# Confirm node #4 has rejoined
|
||||
--connection node_1
|
||||
--let $members = 4
|
||||
--source include/wsrep_wait_membership.inc
|
||||
#DROP TABLE IF EXISTS t2;
|
||||
|
||||
# Confirm that all is good and all nodes have identical data
|
||||
|
||||
--connection node_1
|
||||
SELECT count(*) AS expect1_390 FROM t1;
|
||||
|
||||
#CALL mtr.add_suppression("Replica SQL: Could not execute Delete_rows");
|
||||
#CALL mtr.add_suppression("Event 3 Delete_rows apply failed: 120, seqno [0-9]*");
|
||||
|
||||
--connection node_2
|
||||
SELECT count(*) AS expect2_390 FROM t1;
|
||||
|
||||
#CALL mtr.add_suppression("mysqld: Can't find record in 't1'");
|
||||
#CALL mtr.add_suppression("Replica SQL: Could not execute Delete_rows");
|
||||
#CALL mtr.add_suppression("Event 3 Delete_rows apply failed: 120, seqno seqno [0-9]*");
|
||||
|
||||
--connection node_3
|
||||
SELECT count(*) AS expect3_390 FROM t1;
|
||||
|
||||
--connection node_4
|
||||
SELECT count(*) AS expect4_390 FROM t1;
|
||||
|
||||
DROP TABLE t1;
|
||||
DROP PROCEDURE p1;
|
||||
|
||||
#CALL mtr.add_suppression("inconsistent with group");
|
||||
|
||||
--source ../include/auto_increment_offset_restore.inc
|
21
mysql-test/suite/galera/t/galera_vote_joined_skip.cnf
Normal file
21
mysql-test/suite/galera/t/galera_vote_joined_skip.cnf
Normal file
@@ -0,0 +1,21 @@
|
||||
!include ../galera_4nodes.cnf
|
||||
|
||||
[mysqld]
|
||||
wsrep-ignore-apply-errors=0
|
||||
|
||||
[mysqld.1]
|
||||
wsrep_node_name='node_1'
|
||||
|
||||
[mysqld.2]
|
||||
wsrep_node_name='node_2'
|
||||
|
||||
[mysqld.3]
|
||||
wsrep_node_name='node_3'
|
||||
|
||||
[mysqld.4]
|
||||
wsrep_node_name='node_4'
|
||||
wsrep_sst_donor='node_1'
|
||||
|
||||
[ENV]
|
||||
galera_cluster_size=4
|
||||
MTR_SST_JOINER_DELAY=20
|
100
mysql-test/suite/galera/t/galera_vote_joined_skip.test
Normal file
100
mysql-test/suite/galera/t/galera_vote_joined_skip.test
Normal file
@@ -0,0 +1,100 @@
|
||||
#
|
||||
# Test a case where a vote happens in JOINED state after SST on a writeset
|
||||
# that should be skipped. I.e. JOINED node should continue operation.
|
||||
#
|
||||
|
||||
--source galera_vote_joined_begin.inc
|
||||
#
|
||||
# At this point state snapshot has been copied, node 1 is operational and
|
||||
# we have about 10 seconds while everything we do will go into the replication
|
||||
# queue on node 4 which it will have to apply on top of the snapshot.
|
||||
#
|
||||
|
||||
# Increase replication queue on node_4
|
||||
--connection node_1
|
||||
CALL p1(130);
|
||||
|
||||
#
|
||||
# Create a writeset that node 4 won't be able to apply by making node 3
|
||||
# inconsisitent
|
||||
#
|
||||
--connection node_3
|
||||
--let $node_3_server_id= `SELECT @@server_id`
|
||||
--let $node_3_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_3_server_id.expect
|
||||
--let $node_3_pid_file= `SELECT @@pid_file`
|
||||
SET SESSION wsrep_on = OFF;
|
||||
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
|
||||
SET SESSION wsrep_on = ON;
|
||||
|
||||
# This should cause nodes #1 and #2 to initiate a vote and kick node #3
|
||||
# out of the cluster, node #4 should recover the vote when fails to apply
|
||||
# the event and continue
|
||||
INSERT INTO t2 VALUES (DEFAULT);
|
||||
SET SESSION wsrep_on = OFF;
|
||||
|
||||
# make sure nodes 1,2 progress far enough for commit cut update
|
||||
--connection node_1
|
||||
CALL p1(130);
|
||||
|
||||
--let $members = 3
|
||||
--echo Waiting for server 3 to leave the cluster
|
||||
--connection node_1
|
||||
--source include/wsrep_wait_membership.inc
|
||||
--connection node_2
|
||||
--source include/wsrep_wait_membership.inc
|
||||
--connection node_4
|
||||
# need to wait for extra SST delay on joiner
|
||||
--sleep $MTR_SST_JOINER_DELAY
|
||||
--sleep $MTR_SST_JOINER_DELAY
|
||||
--enable_reconnect
|
||||
--let $wait_timeout = 60
|
||||
--source include/wsrep_wait_membership.inc
|
||||
|
||||
--connection node_3
|
||||
--echo Server 3 left the cluster, killing it...
|
||||
# Kill the connected server
|
||||
--exec echo "wait" > $node_3_expect_file_name
|
||||
--let KILL_NODE_PIDFILE = $node_3_pid_file
|
||||
--perl
|
||||
my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
|
||||
my $mysqld_pid = `cat $pid_filename`;
|
||||
chomp($mysqld_pid);
|
||||
system("kill -9 $mysqld_pid");
|
||||
exit(0);
|
||||
EOF
|
||||
--echo Killed server 3.
|
||||
--source include/wait_until_disconnected.inc
|
||||
--echo Restarting server 3...
|
||||
--exec echo "restart:$start_mysqld_params" > $node_3_expect_file_name
|
||||
|
||||
--echo Waiting for server 3 to rejoin the cluster
|
||||
--connection node_1
|
||||
--let $members = 3
|
||||
--source include/wsrep_wait_membership.inc
|
||||
|
||||
--connection node_3
|
||||
--echo sleeping for $MTR_SST_JOINER_DELAY
|
||||
# need to wait for extra SST delay on joiner
|
||||
--sleep $MTR_SST_JOINER_DELAY
|
||||
--sleep $MTR_SST_JOINER_DELAY
|
||||
--echo Waiting ready
|
||||
--enable_reconnect
|
||||
--source include/galera_wait_ready.inc
|
||||
--echo Server 3 restarted.
|
||||
|
||||
--source galera_vote_joined_end.inc
|
||||
|
||||
--connection node_1
|
||||
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
|
||||
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
|
||||
|
||||
--connection node_2
|
||||
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
|
||||
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
|
||||
|
||||
--connection node_3
|
||||
CALL mtr.add_suppression("Vote 0 \\(success\\) on .* is inconsistent with group");
|
||||
|
||||
--connection node_4
|
||||
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
|
||||
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
|
Reference in New Issue
Block a user