mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-12012/MDEV-11969 Can't remove GTIDs for a stale GTID Domain ID
As reported in MDEV-11969 "there's no way to ditch knowledge" about some domain that is no longer updated on a server. Besides being of annoyance to clutter output in DBA console stale domains can prevent the slave to connect the master as MDEV-12012 witnesses. What domain is obsolete must be evaluated by the user (DBA) according to whether the domain info is still relevant and will the domain ever receive any update. This patch introduces a method to discard obsolete gtid domains from the server binlog state. The removal requires no event group from such domain present in existing binlog files though. If there are any the containing logs must be first PURGEd in order for FLUSH BINARY LOGS DELETE_DOMAIN_ID=(list-of-domains) succeed. Otherwise the command returns an error. The list of obsolete domains can be computed through intersecting two sets - the earliest (first) binlog's Gtid_list and the current value of @@global.gtid_binlog_state - and extracting the domain id components from the intersection list items. The new DELETE_DOMAIN_ID featured FLUSH continues to rotate binlog omitting the deleted domains from the active binlog file's Gtid_list. Notice though when the command is ineffective - that none of requested to delete domain exists in the binlog state - rotation does not occur. Obsolete domain deletion is not harmful for connected slaves as long as master side binlog files *purge* is synchronized with FLUSH-DELETE_DOMAIN_ID. The slaves must have the last event from purged files processed as usual, in order not to bump later into requesting a gtid from a file which was already gone. While the command is not replicated (as ordinary FLUSH BINLOG LOGS is) slaves, even though having extra domains, won't suffer from reconnection errors thanks to master-slave gtid connection protocol allowing the master to be ignorant about a gtid domain. Should at failover such slave to be promoted into master role it may run the ex-master's FLUSH BINARY LOGS DELETE_DOMAIN_ID=(list-of-domains) to clean its own binlog state. NOTES. suite/perfschema/r/start_server_low_digest.result is re-recorded as consequence of internal parser codes changes.
This commit is contained in:
30
mysql-test/suite/rpl/r/rpl_gtid_delete_domain.result
Normal file
30
mysql-test/suite/rpl/r/rpl_gtid_delete_domain.result
Normal file
@@ -0,0 +1,30 @@
|
||||
include/master-slave.inc
|
||||
[connection master]
|
||||
SET @@SESSION.gtid_domain_id=0;
|
||||
CREATE TABLE t (a INT);
|
||||
call mtr.add_suppression("connecting slave requested to start from.*which is not in the master's binlog");
|
||||
include/stop_slave.inc
|
||||
CHANGE MASTER TO master_use_gtid=slave_pos;
|
||||
SET @@SESSION.gtid_domain_id=11;
|
||||
SET @@SESSION.server_id=111;
|
||||
SET @@SESSION.gtid_seq_no=1;
|
||||
INSERT INTO t SET a=1;
|
||||
SET @save.gtid_slave_pos=@@global.gtid_slave_pos;
|
||||
SET @@global.gtid_slave_pos=concat(@@global.gtid_slave_pos, ",", 11, "-", 111, "-", 1 + 1);
|
||||
Warnings:
|
||||
Warning 1947 Specified GTID 0-1-1 conflicts with the binary log which contains a more recent GTID 0-2-2. If MASTER_GTID_POS=CURRENT_POS is used, the binlog position will override the new value of @@gtid_slave_pos.
|
||||
START SLAVE IO_THREAD;
|
||||
include/wait_for_slave_io_error.inc [errno=1236]
|
||||
FLUSH BINARY LOGS;
|
||||
PURGE BINARY LOGS TO 'master-bin.000002';;
|
||||
FLUSH BINARY LOGS DELETE_DOMAIN_ID=(11);
|
||||
include/start_slave.inc
|
||||
INSERT INTO t SET a=1;
|
||||
include/wait_for_slave_io_error.inc [errno=1236]
|
||||
FLUSH BINARY LOGS;
|
||||
PURGE BINARY LOGS TO 'master-bin.000004';;
|
||||
FLUSH BINARY LOGS DELETE_DOMAIN_ID=(11);
|
||||
include/start_slave.inc
|
||||
SET @@SESSION.gtid_domain_id=0;
|
||||
DROP TABLE t;
|
||||
include/rpl_end.inc
|
95
mysql-test/suite/rpl/t/rpl_gtid_delete_domain.test
Normal file
95
mysql-test/suite/rpl/t/rpl_gtid_delete_domain.test
Normal file
@@ -0,0 +1,95 @@
|
||||
# In case master's gtid binlog state is divergent from the slave's gtid_slave_pos
|
||||
# slave may not be able to connect.
|
||||
# For instance when slave is more updated in some of domains, see
|
||||
# MDEV-12012 as example, the master's state may require adjustment.
|
||||
# In a specific case of an "old" divergent domain, that is there
|
||||
# won't be no more event groups from it generated, the states can be
|
||||
# made compatible with wiping the problematic domain away. After that slave
|
||||
# becomes connectable.
|
||||
#
|
||||
# Notice that the slave applied gtid state is not really required to
|
||||
# be similarly cleaned in order for replication to flow.
|
||||
# However this could lead to an expected error when the master
|
||||
# resumes binlogging of such domain which the test demonstrate.
|
||||
|
||||
--source include/master-slave.inc
|
||||
|
||||
--connection master
|
||||
# enforce the default domain_id binlogging explicitly
|
||||
SET @@SESSION.gtid_domain_id=0;
|
||||
CREATE TABLE t (a INT);
|
||||
--sync_slave_with_master
|
||||
|
||||
--connection slave
|
||||
call mtr.add_suppression("connecting slave requested to start from.*which is not in the master's binlog");
|
||||
|
||||
--source include/stop_slave.inc
|
||||
CHANGE MASTER TO master_use_gtid=slave_pos;
|
||||
|
||||
--connection master
|
||||
# create extra gtid domains for binlog state
|
||||
--let $extra_domain_id=11
|
||||
--let $extra_domain_server_id=111
|
||||
--let $extra_gtid_seq_no=1
|
||||
--eval SET @@SESSION.gtid_domain_id=$extra_domain_id
|
||||
--eval SET @@SESSION.server_id=$extra_domain_server_id
|
||||
--eval SET @@SESSION.gtid_seq_no=$extra_gtid_seq_no
|
||||
INSERT INTO t SET a=1;
|
||||
|
||||
#
|
||||
# Set up the slave replication state as if slave knows more events from the extra
|
||||
# domain.
|
||||
#
|
||||
--connection slave
|
||||
SET @save.gtid_slave_pos=@@global.gtid_slave_pos;
|
||||
--eval SET @@global.gtid_slave_pos=concat(@@global.gtid_slave_pos, ",", $extra_domain_id, "-", $extra_domain_server_id, "-", $extra_gtid_seq_no + 1)
|
||||
|
||||
# unsuccessful attempt to start slave
|
||||
START SLAVE IO_THREAD;
|
||||
--let $slave_io_errno=1236
|
||||
--source include/wait_for_slave_io_error.inc
|
||||
|
||||
--connection master
|
||||
# adjust the master binlog state
|
||||
FLUSH BINARY LOGS;
|
||||
--let $purge_to_binlog= query_get_value(SHOW MASTER STATUS, File, 1)
|
||||
--eval PURGE BINARY LOGS TO '$purge_to_binlog';
|
||||
# with final removal of the extra domain
|
||||
--eval FLUSH BINARY LOGS DELETE_DOMAIN_ID=($extra_domain_id)
|
||||
|
||||
--connection slave
|
||||
# start the slave sucessfully
|
||||
--source include/start_slave.inc
|
||||
|
||||
--connection master
|
||||
# but the following gtid from the *extra* domain will break replication
|
||||
INSERT INTO t SET a=1;
|
||||
|
||||
# take note of the slave io thread error due to being dismissed
|
||||
# extra domain at connection to master which tried becoming active;
|
||||
# slave is to stop.
|
||||
--connection slave
|
||||
--let $errno=1236
|
||||
--source include/wait_for_slave_io_error.inc
|
||||
|
||||
# let's apply the very same medicine
|
||||
--connection master
|
||||
FLUSH BINARY LOGS;
|
||||
--let $purge_to_binlog= query_get_value(SHOW MASTER STATUS, File, 1)
|
||||
--eval PURGE BINARY LOGS TO '$purge_to_binlog';
|
||||
# with final removal of the extra domain
|
||||
--eval FLUSH BINARY LOGS DELETE_DOMAIN_ID=($extra_domain_id)
|
||||
|
||||
--connection slave
|
||||
--source include/start_slave.inc
|
||||
|
||||
#
|
||||
# cleanup
|
||||
#
|
||||
--connection master
|
||||
SET @@SESSION.gtid_domain_id=0;
|
||||
DROP TABLE t;
|
||||
|
||||
sync_slave_with_master;
|
||||
|
||||
--source include/rpl_end.inc
|
Reference in New Issue
Block a user