diff --git a/mysql-test/suite/rpl/r/rpl_gtid_errorhandling.result b/mysql-test/suite/rpl/r/rpl_gtid_errorhandling.result index 7597813bcb1..98a8c0b2d87 100644 --- a/mysql-test/suite/rpl/r/rpl_gtid_errorhandling.result +++ b/mysql-test/suite/rpl/r/rpl_gtid_errorhandling.result @@ -173,6 +173,33 @@ a SET sql_log_bin=0; CALL mtr.add_suppression("Slave: Could not update replication slave gtid state"); SET sql_log_bin=1; +*** MDEV-4906: When event apply fails, next SQL thread start errorneously commits the failing GTID to gtid_slave_pos *** +include/stop_slave.inc +SET sql_log_bin=0; +DELETE FROM t2; +SET sql_log_bin=1; +SET @old_format=@@binlog_format; +SET GLOBAL binlog_format='row'; +include/start_slave.inc +SET @old_format=@@binlog_format; +SET binlog_format='row'; +DELETE FROM t2; +SET binlog_format=@old_format; +include/wait_for_slave_sql_error.inc [errno=1032] +result +OK +STOP SLAVE IO_THREAD; +START SLAVE; +include/wait_for_slave_sql_error.inc [errno=1032] +result +OK +STOP SLAVE IO_THREAD; +SET sql_log_bin=0; +INSERT INTO t2 VALUES (1); +CALL mtr.add_suppression("Slave: Can't find record in 't2' Error_code: 1032"); +SET sql_log_bin=1; +include/start_slave.inc +SET GLOBAL binlog_format=@old_format; DROP TABLE t1; DROP TABLE t2; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_gtid_errorhandling.test b/mysql-test/suite/rpl/t/rpl_gtid_errorhandling.test index 5743e1e4ec6..d2a7445c0bc 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_errorhandling.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_errorhandling.test @@ -230,6 +230,57 @@ CALL mtr.add_suppression("Slave: Could not update replication slave gtid state") SET sql_log_bin=1; +--echo *** MDEV-4906: When event apply fails, next SQL thread start errorneously commits the failing GTID to gtid_slave_pos *** + +--connection slave +--source include/stop_slave.inc +SET sql_log_bin=0; +DELETE FROM t2; +SET sql_log_bin=1; +SET @old_format=@@binlog_format; +SET GLOBAL binlog_format='row'; +--source include/start_slave.inc + +--connection master +SET @old_format=@@binlog_format; +SET binlog_format='row'; +--let $gtid_pos1=`SELECT @@GLOBAL.gtid_binlog_pos` +DELETE FROM t2; +SET binlog_format=@old_format; +--save_master_pos + +--connection slave +--let $slave_sql_errno= 1032 +--source include/wait_for_slave_sql_error.inc +# Disable query to avoid result file update if precise GTID value changes. +--disable_query_log +SET @x=@@GLOBAL.gtid_slave_pos; +eval SELECT IF(@x='$gtid_pos1', "OK", CONCAT("ERROR: expected $gtid_pos1 got ", @x)) AS result; +--enable_query_log + +# The bug was that upon restarting the SQL thread, the GTID for the +# failing event group was not cleared, so we would update it in the +# gtid_slave_pos as part of the first rotate event, corrupting the +# replication. +STOP SLAVE IO_THREAD; +START SLAVE; +--let $slave_sql_errno= 1032 +--source include/wait_for_slave_sql_error.inc +# Disable query to avoid result file update if precise GTID value changes. +--disable_query_log +SET @x=@@GLOBAL.gtid_slave_pos; +eval SELECT IF(@x='$gtid_pos1', "OK", CONCAT("ERROR: expected $gtid_pos1 got ", @x)) AS result; +--enable_query_log + +STOP SLAVE IO_THREAD; +SET sql_log_bin=0; +INSERT INTO t2 VALUES (1); +CALL mtr.add_suppression("Slave: Can't find record in 't2' Error_code: 1032"); +SET sql_log_bin=1; +--source include/start_slave.inc +--sync_with_master +SET GLOBAL binlog_format=@old_format; + --connection master DROP TABLE t1; DROP TABLE t2; diff --git a/sql/slave.cc b/sql/slave.cc index 77737616deb..b10d1a17c23 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3160,6 +3160,14 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli) DBUG_RETURN(2); } } + else + { + /* + Make sure we do not errorneously update gtid_slave_pos with a lingering + GTID from this failed event group (MDEV-4906). + */ + rli->gtid_sub_id= 0; + } DBUG_RETURN(exec_res ? 1 : 0); } @@ -4094,6 +4102,7 @@ pthread_handler_t handle_slave_sql(void *arg) rli->trans_retries= 0; // start from "no error" DBUG_PRINT("info", ("rli->trans_retries: %lu", rli->trans_retries)); + rli->gtid_sub_id= 0; if (init_relay_log_pos(rli, rli->group_relay_log_name, rli->group_relay_log_pos,