mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
bMDEV-4906: When event apply fails, next SQL thread start errorneously commits the failing GTID to gtid_slave_pos
When a GTID event is executed, we remember the contained GTID position so that when we have applied the entire event group we can commit it to gtid_slave_pos. However, if the event group fails to apply due to some error and the SQL thread aborts, the code did not correctly clear the remembered GTID. Thus, when SQL thread was restarted, the old GTID of the failing event group was incorrectly updated to gtid_slave_pos when the initial rotate event was executed, corrupting the GTID position.
This commit is contained in:
@ -173,6 +173,33 @@ a
|
|||||||
SET sql_log_bin=0;
|
SET sql_log_bin=0;
|
||||||
CALL mtr.add_suppression("Slave: Could not update replication slave gtid state");
|
CALL mtr.add_suppression("Slave: Could not update replication slave gtid state");
|
||||||
SET sql_log_bin=1;
|
SET sql_log_bin=1;
|
||||||
|
*** MDEV-4906: When event apply fails, next SQL thread start errorneously commits the failing GTID to gtid_slave_pos ***
|
||||||
|
include/stop_slave.inc
|
||||||
|
SET sql_log_bin=0;
|
||||||
|
DELETE FROM t2;
|
||||||
|
SET sql_log_bin=1;
|
||||||
|
SET @old_format=@@binlog_format;
|
||||||
|
SET GLOBAL binlog_format='row';
|
||||||
|
include/start_slave.inc
|
||||||
|
SET @old_format=@@binlog_format;
|
||||||
|
SET binlog_format='row';
|
||||||
|
DELETE FROM t2;
|
||||||
|
SET binlog_format=@old_format;
|
||||||
|
include/wait_for_slave_sql_error.inc [errno=1032]
|
||||||
|
result
|
||||||
|
OK
|
||||||
|
STOP SLAVE IO_THREAD;
|
||||||
|
START SLAVE;
|
||||||
|
include/wait_for_slave_sql_error.inc [errno=1032]
|
||||||
|
result
|
||||||
|
OK
|
||||||
|
STOP SLAVE IO_THREAD;
|
||||||
|
SET sql_log_bin=0;
|
||||||
|
INSERT INTO t2 VALUES (1);
|
||||||
|
CALL mtr.add_suppression("Slave: Can't find record in 't2' Error_code: 1032");
|
||||||
|
SET sql_log_bin=1;
|
||||||
|
include/start_slave.inc
|
||||||
|
SET GLOBAL binlog_format=@old_format;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
DROP TABLE t2;
|
DROP TABLE t2;
|
||||||
include/rpl_end.inc
|
include/rpl_end.inc
|
||||||
|
@ -230,6 +230,57 @@ CALL mtr.add_suppression("Slave: Could not update replication slave gtid state")
|
|||||||
SET sql_log_bin=1;
|
SET sql_log_bin=1;
|
||||||
|
|
||||||
|
|
||||||
|
--echo *** MDEV-4906: When event apply fails, next SQL thread start errorneously commits the failing GTID to gtid_slave_pos ***
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
SET sql_log_bin=0;
|
||||||
|
DELETE FROM t2;
|
||||||
|
SET sql_log_bin=1;
|
||||||
|
SET @old_format=@@binlog_format;
|
||||||
|
SET GLOBAL binlog_format='row';
|
||||||
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
--connection master
|
||||||
|
SET @old_format=@@binlog_format;
|
||||||
|
SET binlog_format='row';
|
||||||
|
--let $gtid_pos1=`SELECT @@GLOBAL.gtid_binlog_pos`
|
||||||
|
DELETE FROM t2;
|
||||||
|
SET binlog_format=@old_format;
|
||||||
|
--save_master_pos
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
--let $slave_sql_errno= 1032
|
||||||
|
--source include/wait_for_slave_sql_error.inc
|
||||||
|
# Disable query to avoid result file update if precise GTID value changes.
|
||||||
|
--disable_query_log
|
||||||
|
SET @x=@@GLOBAL.gtid_slave_pos;
|
||||||
|
eval SELECT IF(@x='$gtid_pos1', "OK", CONCAT("ERROR: expected $gtid_pos1 got ", @x)) AS result;
|
||||||
|
--enable_query_log
|
||||||
|
|
||||||
|
# The bug was that upon restarting the SQL thread, the GTID for the
|
||||||
|
# failing event group was not cleared, so we would update it in the
|
||||||
|
# gtid_slave_pos as part of the first rotate event, corrupting the
|
||||||
|
# replication.
|
||||||
|
STOP SLAVE IO_THREAD;
|
||||||
|
START SLAVE;
|
||||||
|
--let $slave_sql_errno= 1032
|
||||||
|
--source include/wait_for_slave_sql_error.inc
|
||||||
|
# Disable query to avoid result file update if precise GTID value changes.
|
||||||
|
--disable_query_log
|
||||||
|
SET @x=@@GLOBAL.gtid_slave_pos;
|
||||||
|
eval SELECT IF(@x='$gtid_pos1', "OK", CONCAT("ERROR: expected $gtid_pos1 got ", @x)) AS result;
|
||||||
|
--enable_query_log
|
||||||
|
|
||||||
|
STOP SLAVE IO_THREAD;
|
||||||
|
SET sql_log_bin=0;
|
||||||
|
INSERT INTO t2 VALUES (1);
|
||||||
|
CALL mtr.add_suppression("Slave: Can't find record in 't2' Error_code: 1032");
|
||||||
|
SET sql_log_bin=1;
|
||||||
|
--source include/start_slave.inc
|
||||||
|
--sync_with_master
|
||||||
|
SET GLOBAL binlog_format=@old_format;
|
||||||
|
|
||||||
--connection master
|
--connection master
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
DROP TABLE t2;
|
DROP TABLE t2;
|
||||||
|
@ -3160,6 +3160,14 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli)
|
|||||||
DBUG_RETURN(2);
|
DBUG_RETURN(2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Make sure we do not errorneously update gtid_slave_pos with a lingering
|
||||||
|
GTID from this failed event group (MDEV-4906).
|
||||||
|
*/
|
||||||
|
rli->gtid_sub_id= 0;
|
||||||
|
}
|
||||||
|
|
||||||
DBUG_RETURN(exec_res ? 1 : 0);
|
DBUG_RETURN(exec_res ? 1 : 0);
|
||||||
}
|
}
|
||||||
@ -4094,6 +4102,7 @@ pthread_handler_t handle_slave_sql(void *arg)
|
|||||||
rli->trans_retries= 0; // start from "no error"
|
rli->trans_retries= 0; // start from "no error"
|
||||||
DBUG_PRINT("info", ("rli->trans_retries: %lu", rli->trans_retries));
|
DBUG_PRINT("info", ("rli->trans_retries: %lu", rli->trans_retries));
|
||||||
|
|
||||||
|
rli->gtid_sub_id= 0;
|
||||||
if (init_relay_log_pos(rli,
|
if (init_relay_log_pos(rli,
|
||||||
rli->group_relay_log_name,
|
rli->group_relay_log_name,
|
||||||
rli->group_relay_log_pos,
|
rli->group_relay_log_pos,
|
||||||
|
Reference in New Issue
Block a user