MDEV-34998: master can stop responding after cluster vote to evict a node

After cluster vote to evict a node that failed a transaction, current master can't commit anymore. Error voting for joiner in the JOINED state was broken because the group-wide commit cut (implicit SUCCESS vote) was not taken into account when processing error vote request from the JOINED node. This commit adds 3 MTR tests to verify the fix in the galera library works as designed. Requires Galera library commit 91f0090a05e96c3cc353b80d961ede45cefb9279 (galera library version > 26.4.19). Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
2025-08-08 11:22:35 +03:00 · 2024-06-07 00:52:29 +03:00
parent cb7e39b75b
commit ec5068fe59
12 changed files with 810 additions and 0 deletions
--- a/mysql-test/suite/galera/r/galera_vote_during_ist.result
+++ b/mysql-test/suite/galera/r/galera_vote_during_ist.result
@@ -0,0 +1,109 @@
+connection node_4;
+connection node_3;
+connection node_2;
+connection node_1;
+connection node_1;
+connection node_2;
+connection node_3;
+connection node_4;
+connection node_1;
+CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
+CREATE PROCEDURE p1(IN max INT)
+BEGIN
+DECLARE i INT;
+DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
+SET i = 0;
+WHILE i < max DO
+INSERT IGNORE INTO t1 VALUES (DEFAULT);
+SET i = i + 1;
+END WHILE;
+END|
+CALL p1(130);
+connection node_4;
+Shutting down server 4...
+connection node_1;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_2;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_3;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+Server 4 left the cluster
+connection node_1;
+CALL p1(130);
+connection node_1;
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+connection node_2;
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+connection node_3;
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+INSERT INTO t2 VALUES (DEFAULT);
+CALL p1(130);
+connection node_1;
+SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation";
+Restarting server 4
+Wait for server 1 to become a donor
+SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached";
+Server 1 got SST request from server 4
+SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue";
+SET GLOBAL debug = "";
+SET DEBUG_SYNC='RESET';
+Waiting for server 4 to leave the cluster
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_2;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_3;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_4;
+Server 4 left the cluster, killing it...
+Killed server 4...
+Restarting server 4...
+connection node_1;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_1;
+SELECT count(*) AS expect1_390 FROM t1;
+expect1_390
+390
+SELECT count(*) AS expect1_1 FROM t2;
+expect1_1
+1
+connection node_2;
+SELECT count(*) AS expect2_390 FROM t1;
+expect2_390
+390
+SELECT count(*) AS expect2_1 FROM t2;
+expect2_1
+1
+connection node_3;
+SELECT count(*) AS expect3_390 FROM t1;
+expect3_390
+390
+SELECT count(*) AS expect3_1 FROM t2;
+expect3_1
+1
+connection node_4;
+SELECT count(*) AS expect4_390 FROM t1;
+expect4_390
+390
+SELECT count(*) AS expect4_1 FROM t2;
+expect4_1
+1
+DROP TABLE t1;
+DROP TABLE t2;
+DROP PROCEDURE p1;
+CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
+CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
+CALL mtr.add_suppression("Inconsistency detected: Failed on preordered");
+CALL mtr.add_suppression("Failed to apply write set");
--- a/mysql-test/suite/galera/r/galera_vote_joined_apply.result
+++ b/mysql-test/suite/galera/r/galera_vote_joined_apply.result
@@ -0,0 +1,93 @@
+connection node_4;
+connection node_3;
+connection node_2;
+connection node_1;
+connection node_1;
+connection node_2;
+connection node_3;
+connection node_4;
+connection node_1;
+CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
+CREATE PROCEDURE p1(IN max INT)
+BEGIN
+DECLARE i INT;
+DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
+SET i = 0;
+WHILE i < max DO
+INSERT IGNORE INTO t1 VALUES (DEFAULT);
+SET i = i + 1;
+END WHILE;
+END|
+CALL p1(130);
+connection node_4;
+Shutting down server 4...
+connection node_1;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+SET GLOBAL debug = "+d,sync.wsrep_donor_state";
+connection node_4;
+Restarting server 4...
+connection node_1;
+SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached";
+Tables on server 1 flushed and locked for SST to server 4
+SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state";
+SET GLOBAL debug = "";
+SET DEBUG_SYNC='RESET';
+Wait for the state snapshot to be copied to server 4
+SST script unlocked server 1
+connection node_1;
+CALL p1(130);
+connection node_1;
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+connection node_2;
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+connection node_3;
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+INSERT INTO t2 VALUES (DEFAULT);
+CALL p1(130);
+Waiting for server 4 to leave the cluster
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_2;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_1;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_4;
+Server 4 left the cluster, killing it...
+Killed server 4...
+Restarting server 4...
+DROP TABLE t2;
+connection node_1;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_1;
+SELECT count(*) AS expect1_390 FROM t1;
+expect1_390
+390
+connection node_2;
+SELECT count(*) AS expect2_390 FROM t1;
+expect2_390
+390
+connection node_3;
+SELECT count(*) AS expect3_390 FROM t1;
+expect3_390
+390
+connection node_4;
+SELECT count(*) AS expect4_390 FROM t1;
+expect4_390
+390
+DROP TABLE t1;
+DROP PROCEDURE p1;
+connection node_4;
+CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
+CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
+CALL mtr.add_suppression("Inconsistency detected: Inconsistent by consensus");
+CALL mtr.add_suppression("Failed to apply write set: gtid:");
--- a/mysql-test/suite/galera/r/galera_vote_joined_skip.result
+++ b/mysql-test/suite/galera/r/galera_vote_joined_skip.result
@@ -0,0 +1,101 @@
+connection node_4;
+connection node_3;
+connection node_2;
+connection node_1;
+connection node_1;
+connection node_2;
+connection node_3;
+connection node_4;
+connection node_1;
+CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
+CREATE PROCEDURE p1(IN max INT)
+BEGIN
+DECLARE i INT;
+DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
+SET i = 0;
+WHILE i < max DO
+INSERT IGNORE INTO t1 VALUES (DEFAULT);
+SET i = i + 1;
+END WHILE;
+END|
+CALL p1(130);
+connection node_4;
+Shutting down server 4...
+connection node_1;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+SET GLOBAL debug = "+d,sync.wsrep_donor_state";
+connection node_4;
+Restarting server 4...
+connection node_1;
+SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached";
+Tables on server 1 flushed and locked for SST to server 4
+SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state";
+SET GLOBAL debug = "";
+SET DEBUG_SYNC='RESET';
+Wait for the state snapshot to be copied to server 4
+SST script unlocked server 1
+connection node_1;
+CALL p1(130);
+connection node_3;
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+INSERT INTO t2 VALUES (DEFAULT);
+SET SESSION wsrep_on = OFF;
+connection node_1;
+CALL p1(130);
+Waiting for server 3 to leave the cluster
+connection node_1;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_2;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_4;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_3;
+Server 3 left the cluster, killing it...
+Killed server 3.
+Restarting server 3...
+Waiting for server 3 to rejoin the cluster
+connection node_1;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_3;
+sleeping for 20
+Waiting ready
+Server 3 restarted.
+connection node_1;
+SET SESSION wsrep_on = ON;
+SET SESSION wsrep_sync_wait = 15;
+connection node_1;
+SELECT count(*) AS expect1_390 FROM t1;
+expect1_390
+390
+connection node_2;
+SELECT count(*) AS expect2_390 FROM t1;
+expect2_390
+390
+connection node_3;
+SELECT count(*) AS expect3_390 FROM t1;
+expect3_390
+390
+connection node_4;
+SELECT count(*) AS expect4_390 FROM t1;
+expect4_390
+390
+DROP TABLE t1;
+DROP PROCEDURE p1;
+connection node_1;
+CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
+CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
+connection node_2;
+CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
+CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
+connection node_3;
+CALL mtr.add_suppression("Vote 0 \\(success\\) on .* is inconsistent with group");
+connection node_4;
+CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
+CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
--- a/mysql-test/suite/galera/t/galera_vote_during_ist.cnf
+++ b/mysql-test/suite/galera/t/galera_vote_during_ist.cnf
@@ -0,0 +1,20 @@
+!include ../galera_4nodes.cnf
+
+[mysqld]
+wsrep-ignore-apply-errors=0
+
+[mysqld.1]
+wsrep_node_name='node_1'
+
+[mysqld.2]
+wsrep_node_name='node_2'
+
+[mysqld.3]
+wsrep_node_name='node_3'
+
+[mysqld.4]
+wsrep_node_name='node_4'
+wsrep_sst_donor='node_1'
+
+[ENV]
+galera_cluster_size=4
--- a/mysql-test/suite/galera/t/galera_vote_during_ist.test
+++ b/mysql-test/suite/galera/t/galera_vote_during_ist.test
@@ -0,0 +1,158 @@
+#
+# Test a case where a joiner encounters an error during IST
+# Instead of voting it should assume error and  bail out.
+#
+
+--source include/galera_cluster.inc
+--source include/big_test.inc
+--source include/have_debug_sync.inc
+
+--let $node_1=node_1
+--let $node_2=node_2
+--let $node_3=node_3
+--let $node_4=node_4
+--source ../include/auto_increment_offset_save.inc
+
+# create table t1 and procedure p1 to generate wirtesets
+--connection node_1
+CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
+
+DELIMITER |;
+CREATE PROCEDURE p1(IN max INT)
+BEGIN
+  DECLARE i INT;
+  DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
+
+  SET i = 0;
+  WHILE i < max DO
+    INSERT IGNORE INTO t1 VALUES (DEFAULT);
+    SET i = i + 1;
+  END WHILE;
+END|
+DELIMITER ;|
+
+CALL p1(130);
+
+--connection node_4
+--echo Shutting down server 4...
+--let $node_4_server_id= `SELECT @@server_id`
+--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect
+--let $node_4_pid_file= `SELECT @@pid_file`
+--source include/shutdown_mysqld.inc
+
+# Wait for node #4 to leave cluster
+--let $members = 3
+--connection node_1
+--source include/wsrep_wait_membership.inc
+--connection node_2
+--source include/wsrep_wait_membership.inc
+--connection node_3
+--source include/wsrep_wait_membership.inc
+--echo Server 4 left the cluster
+
+# Create some writesets for IST
+--connection node_1
+CALL p1(130);
+
+# Create a writeset that node 4 won't be able to apply by creating a table
+# that won't be present in the replication stream
+--connection node_1
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+
+--connection node_2
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+
+--connection node_3
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+
+# This should cause error during IST
+INSERT INTO t2 VALUES (DEFAULT);
+
+# make sure nodes 1,2,3 progress far enough for commit cut update
+CALL p1(130);
+
+--connection node_1
+# prepare to stop SST donor thread when it receives a request from starting node #4
+SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation";
+
+--echo Restarting server 4
+# Need to use this form instead of start_mysqld.inc because the latter is blocking
+--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name
+
+--echo Wait for server 1 to become a donor
+SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached";
+--echo Server 1 got SST request from server 4
+SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue";
+SET GLOBAL debug = "";
+SET DEBUG_SYNC='RESET';
+
+#
+# After this point node #4 shall proceed to IST and bail out
+#
+
+--echo Waiting for server 4 to leave the cluster
+--let $members = 3
+--source include/wsrep_wait_membership.inc
+--connection node_2
+--source include/wsrep_wait_membership.inc
+--connection node_3
+--source include/wsrep_wait_membership.inc
+
+--connection node_4
+--echo Server 4 left the cluster, killing it...
+
+# Kill the connected server
+--exec echo "wait" > $node_4_expect_file_name
+--let KILL_NODE_PIDFILE = $node_4_pid_file
+--perl
+        my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
+        my $mysqld_pid = `cat $pid_filename`;
+        chomp($mysqld_pid);
+        system("kill -9 $mysqld_pid");
+        exit(0);
+EOF
+--echo Killed server 4...
+--source include/wait_until_disconnected.inc
+--echo Restarting server 4...
+--source include/start_mysqld.inc
+--source include/galera_wait_ready.inc
+
+# Confirm node #4 has rejoined
+--connection node_1
+--let $members = 4
+--source include/wsrep_wait_membership.inc
+
+# Confirm that all is good and all nodes have identical data
+
+--connection node_1
+SELECT count(*) AS expect1_390 FROM t1;
+SELECT count(*) AS expect1_1 FROM t2;
+
+--connection node_2
+SELECT count(*) AS expect2_390 FROM t1;
+SELECT count(*) AS expect2_1 FROM t2;
+
+--connection node_3
+SELECT count(*) AS expect3_390 FROM t1;
+SELECT count(*) AS expect3_1 FROM t2;
+
+--connection node_4
+SELECT count(*) AS expect4_390 FROM t1;
+SELECT count(*) AS expect4_1 FROM t2;
+
+DROP TABLE t1;
+DROP TABLE t2;
+DROP PROCEDURE p1;
+
+CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
+CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
+CALL mtr.add_suppression("Inconsistency detected: Failed on preordered");
+CALL mtr.add_suppression("Failed to apply write set");
+
+--source ../include/auto_increment_offset_restore.inc
--- a/mysql-test/suite/galera/t/galera_vote_joined_apply.cnf
+++ b/mysql-test/suite/galera/t/galera_vote_joined_apply.cnf
@@ -0,0 +1,21 @@
+!include ../galera_4nodes.cnf
+
+[mysqld]
+wsrep-ignore-apply-errors=0
+
+[mysqld.1]
+wsrep_node_name='node_1'
+
+[mysqld.2]
+wsrep_node_name='node_2'
+
+[mysqld.3]
+wsrep_node_name='node_3'
+
+[mysqld.4]
+wsrep_node_name='node_4'
+wsrep_sst_donor='node_1'
+
+[ENV]
+galera_cluster_size=4
+MTR_SST_JOINER_DELAY=20
--- a/mysql-test/suite/galera/t/galera_vote_joined_apply.test
+++ b/mysql-test/suite/galera/t/galera_vote_joined_apply.test
@@ -0,0 +1,73 @@
+#
+# Test a case where a vote happens in JOINED state after SST on a writeset
+# that should be applied.
+#
+
+--source galera_vote_joined_begin.inc
+#
+# At this point state snapshot has been copied, node 1 is operational and
+# we have about 10 seconds while everything we do will go into the replication
+# queue on node 4 which it will have to apply on top of the snapshot.
+#
+
+# Increase replication queue on node_4
+--connection node_1
+CALL p1(130);
+
+# Create a writeset that node 4 won't be able to apply by creating a table
+# that won't be present in the replication stream
+--connection node_1
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+
+--connection node_2
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+
+--connection node_3
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+
+# This should cause node #4 to initiate a vote and leave the cluster
+INSERT INTO t2 VALUES (DEFAULT);
+
+# make sure nodes 1,2,3 progress far enough for commit cut update
+CALL p1(130);
+
+--echo Waiting for server 4 to leave the cluster
+--let $members = 3
+--source include/wsrep_wait_membership.inc
+--connection node_2
+--source include/wsrep_wait_membership.inc
+--connection node_1
+--source include/wsrep_wait_membership.inc
+
+--connection node_4
+--echo Server 4 left the cluster, killing it...
+# Kill the connected server
+--exec echo "wait" > $node_4_expect_file_name
+--let KILL_NODE_PIDFILE = $node_4_pid_file
+--perl
+        my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
+        my $mysqld_pid = `cat $pid_filename`;
+        chomp($mysqld_pid);
+        system("kill -9 $mysqld_pid");
+        exit(0);
+EOF
+--echo Killed server 4...
+--source include/wait_until_disconnected.inc
+--echo Restarting server 4...
+--source include/start_mysqld.inc
+--source include/galera_wait_ready.inc
+DROP TABLE t2;
+
+--source galera_vote_joined_end.inc
+
+--connection node_4
+CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
+CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
+CALL mtr.add_suppression("Inconsistency detected: Inconsistent by consensus");
+CALL mtr.add_suppression("Failed to apply write set: gtid:");
--- a/mysql-test/suite/galera/t/galera_vote_joined_begin.inc
+++ b/mysql-test/suite/galera/t/galera_vote_joined_begin.inc
@@ -0,0 +1,74 @@
+# This file purpose is to set up node 4 to require SST which is artificaially
+# prolonged and as a result accumulate sufficient relication queue.
+# The contents of the qeuee are controlled in the sourcing test files.
+
+--source include/galera_cluster.inc
+--source include/big_test.inc
+--source include/have_debug_sync.inc
+
+--let $node_1=node_1
+--let $node_2=node_2
+--let $node_3=node_3
+--let $node_4=node_4
+--source ../include/auto_increment_offset_save.inc
+
+# create table t1 and procedure p1 to generate wirtesets
+--connection node_1
+CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
+
+DELIMITER |;
+CREATE PROCEDURE p1(IN max INT)
+BEGIN
+  DECLARE i INT;
+  DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
+
+  SET i = 0;
+  WHILE i < max DO
+    INSERT IGNORE INTO t1 VALUES (DEFAULT);
+    SET i = i + 1;
+  END WHILE;
+END|
+DELIMITER ;|
+
+# 130 events move the commit cut, it is essential in voting
+CALL p1(130);
+
+--connection node_4
+--echo Shutting down server 4...
+--let $node_4_server_id= `SELECT @@server_id`
+--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect
+--let $node_4_pid_file= `SELECT @@pid_file`
+--source include/shutdown_mysqld.inc
+# enforce SST
+--exec rm -rf $MYSQLTEST_VARDIR/mysqld.4/data/grastate.dat
+
+# Wait for node #4 to leave cluster
+--connection node_1
+--let $members = 3
+--source include/wsrep_wait_membership.inc
+
+# prepare to stop SST donor thread when node is in donor state
+SET GLOBAL debug = "+d,sync.wsrep_donor_state";
+
+--connection node_4
+--echo Restarting server 4...
+# Need to use this form instead of start_mysqld.inc because the latter is blocking
+--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name
+
+# Wait for node #1 to become a donor
+--connection node_1
+SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached";
+--echo Tables on server 1 flushed and locked for SST to server 4
+SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state";
+SET GLOBAL debug = "";
+SET DEBUG_SYNC='RESET';
+
+--echo Wait for the state snapshot to be copied to server 4
+--source include/galera_wait_ready.inc
+--echo SST script unlocked server 1
+
+#
+# At this point state snapshot has been copied, node 1 is operational and
+# we have about 20 seconds while everything we do will go into the replication
+# queue on node 4 which it will have to apply on top of the snapshot.
+#
--- a/mysql-test/suite/galera/t/galera_vote_joined_end.inc
+++ b/mysql-test/suite/galera/t/galera_vote_joined_end.inc
@@ -0,0 +1,33 @@
+# Confirm node #4 has rejoined
+--connection node_1
+--let $members = 4
+--source include/wsrep_wait_membership.inc
+#DROP TABLE IF EXISTS t2;
+
+# Confirm that all is good and all nodes have identical data
+
+--connection node_1
+SELECT count(*) AS expect1_390 FROM t1;
+
+#CALL mtr.add_suppression("Replica SQL: Could not execute Delete_rows");
+#CALL mtr.add_suppression("Event 3 Delete_rows apply failed: 120, seqno [0-9]*");
+
+--connection node_2
+SELECT count(*) AS expect2_390 FROM t1;
+
+#CALL mtr.add_suppression("mysqld: Can't find record in 't1'");
+#CALL mtr.add_suppression("Replica SQL: Could not execute Delete_rows");
+#CALL mtr.add_suppression("Event 3 Delete_rows apply failed: 120, seqno seqno [0-9]*");
+
+--connection node_3
+SELECT count(*) AS expect3_390 FROM t1;
+
+--connection node_4
+SELECT count(*) AS expect4_390 FROM t1;
+
+DROP TABLE t1;
+DROP PROCEDURE p1;
+
+#CALL mtr.add_suppression("inconsistent with group");
+
+--source ../include/auto_increment_offset_restore.inc
--- a/mysql-test/suite/galera/t/galera_vote_joined_skip.cnf
+++ b/mysql-test/suite/galera/t/galera_vote_joined_skip.cnf
@@ -0,0 +1,21 @@
+!include ../galera_4nodes.cnf
+
+[mysqld]
+wsrep-ignore-apply-errors=0
+
+[mysqld.1]
+wsrep_node_name='node_1'
+
+[mysqld.2]
+wsrep_node_name='node_2'
+
+[mysqld.3]
+wsrep_node_name='node_3'
+
+[mysqld.4]
+wsrep_node_name='node_4'
+wsrep_sst_donor='node_1'
+
+[ENV]
+galera_cluster_size=4
+MTR_SST_JOINER_DELAY=20
--- a/mysql-test/suite/galera/t/galera_vote_joined_skip.test
+++ b/mysql-test/suite/galera/t/galera_vote_joined_skip.test
@@ -0,0 +1,100 @@
+#
+# Test a case where a vote happens in JOINED state after SST on a writeset
+# that should be skipped. I.e. JOINED node should continue operation.
+#
+
+--source galera_vote_joined_begin.inc
+#
+# At this point state snapshot has been copied, node 1 is operational and
+# we have about 10 seconds while everything we do will go into the replication
+# queue on node 4 which it will have to apply on top of the snapshot.
+#
+
+# Increase replication queue on node_4
+--connection node_1
+CALL p1(130);
+
+#
+# Create a writeset that node 4 won't be able to apply by making node 3
+# inconsisitent
+#
+--connection node_3
+--let $node_3_server_id= `SELECT @@server_id`
+--let $node_3_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_3_server_id.expect
+--let $node_3_pid_file= `SELECT @@pid_file`
+SET SESSION wsrep_on = OFF;
+CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
+SET SESSION wsrep_on = ON;
+
+# This should cause nodes #1 and #2 to initiate a vote and kick node #3
+# out of the cluster, node #4 should recover the vote when fails to apply
+# the event and continue
+INSERT INTO t2 VALUES (DEFAULT);
+SET SESSION wsrep_on = OFF;
+
+# make sure nodes 1,2 progress far enough for commit cut update
+--connection node_1
+CALL p1(130);
+
+--let $members = 3
+--echo Waiting for server 3 to leave the cluster
+--connection node_1
+--source include/wsrep_wait_membership.inc
+--connection node_2
+--source include/wsrep_wait_membership.inc
+--connection node_4
+# need to wait for extra SST delay on joiner
+--sleep $MTR_SST_JOINER_DELAY
+--sleep $MTR_SST_JOINER_DELAY
+--enable_reconnect
+--let $wait_timeout = 60
+--source include/wsrep_wait_membership.inc
+
+--connection node_3
+--echo Server 3 left the cluster, killing it...
+# Kill the connected server
+--exec echo "wait" > $node_3_expect_file_name
+--let KILL_NODE_PIDFILE = $node_3_pid_file
+--perl
+        my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
+        my $mysqld_pid = `cat $pid_filename`;
+        chomp($mysqld_pid);
+        system("kill -9 $mysqld_pid");
+        exit(0);
+EOF
+--echo Killed server 3.
+--source include/wait_until_disconnected.inc
+--echo Restarting server 3...
+--exec echo "restart:$start_mysqld_params" > $node_3_expect_file_name
+
+--echo Waiting for server 3 to rejoin the cluster
+--connection node_1
+--let $members = 3
+--source include/wsrep_wait_membership.inc
+
+--connection node_3
+--echo sleeping for $MTR_SST_JOINER_DELAY
+# need to wait for extra SST delay on joiner
+--sleep $MTR_SST_JOINER_DELAY
+--sleep $MTR_SST_JOINER_DELAY
+--echo Waiting ready
+--enable_reconnect
+--source include/galera_wait_ready.inc
+--echo Server 3 restarted.
+
+--source galera_vote_joined_end.inc
+
+--connection node_1
+CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
+CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
+
+--connection node_2
+CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
+CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
+
+--connection node_3
+CALL mtr.add_suppression("Vote 0 \\(success\\) on .* is inconsistent with group");
+
+--connection node_4
+CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
+CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
--- a/scripts/wsrep_sst_rsync.sh
+++ b/scripts/wsrep_sst_rsync.sh
@@ -915,6 +915,13 @@ EOF
        fi
    fi

+    # Delay for MTR tests if needed to simulate long SST
+    if [ ${MTR_SST_JOINER_DELAY:=0} -gt 0 ]
+    then
+        wsrep_log_info "Sleeping $MTR_SST_JOINER_DELAY seconds for MTR test"
+        sleep $MTR_SST_JOINER_DELAY
+    fi
+
    # Remove special tags from the magic file, and from the output:
    coords=$(head -n1 "$MAGIC_FILE")
    wsrep_log_info "Galera co-ords from recovery: $coords"