diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index cf25f377142..094a63cf50a 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -294,6 +294,15 @@ The following specify which files/extra groups are read (specified before remain --group-concat-max-len=# The maximum length of the result of function GROUP_CONCAT() + --gtid-cleanup-batch-size=# + Normally does not need tuning. How many old rows must + accumulate in the mysql.gtid_slave_pos table before a + background job will be run to delete them. Can be + increased to reduce number of commits if using many + different engines with --gtid_pos_auto_engines, or to + reduce CPU overhead if using a huge number of different + gtid_domain_ids. Can be decreased to reduce number of old + rows in the table. --gtid-domain-id=# Used with global transaction ID to identify logically independent replication streams. When events can propagate through multiple parallel paths (for example @@ -1434,6 +1443,7 @@ gdb FALSE general-log FALSE getopt-prefix-matching FALSE group-concat-max-len 1048576 +gtid-cleanup-batch-size 64 gtid-domain-id 0 gtid-ignore-duplicates FALSE gtid-pos-auto-engines diff --git a/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result b/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result index 00307a8c104..5dffdd9809c 100644 --- a/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result +++ b/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result @@ -16,36 +16,32 @@ INSERT INTO t1 VALUES (1); connection slave; connection slave; include/stop_slave.inc +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size; +SET GLOBAL gtid_cleanup_batch_size= 2; SET @old_dbug= @@GLOBAL.debug_dbug; SET GLOBAL debug_dbug="+d,gtid_slave_pos_simulate_failed_delete"; SET sql_log_bin= 0; -CALL mtr.add_suppression("Can't find file"); +CALL mtr.add_suppression(" Error deleting old GTID row"); SET sql_log_bin= 1; include/start_slave.inc connection master; -INSERT INTO t1 VALUES (2); connection slave; -include/wait_for_slave_sql_error.inc [errno=1942] -STOP SLAVE IO_THREAD; -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos -ORDER BY domain_id, sub_id DESC LIMIT 1; -domain_id server_id seq_no -0 1 3 +SELECT COUNT(*), MAX(seq_no) INTO @pre_count, @pre_max_seq_no +FROM mysql.gtid_slave_pos; +SELECT IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count)); +IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count)) +OK SET GLOBAL debug_dbug= @old_dbug; -include/start_slave.inc connection master; -INSERT INTO t1 VALUES (3); connection slave; connection slave; -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos -ORDER BY domain_id, sub_id DESC LIMIT 1; -domain_id server_id seq_no -0 1 4 -SELECT * FROM t1 ORDER BY i; -i -1 -2 -3 +SELECT IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*))) +FROM mysql.gtid_slave_pos +WHERE seq_no <= @pre_max_seq_no; +IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*))) +OK connection master; DROP TABLE t1; +connection slave; +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result b/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result index 1647b6c998c..50f24d56e9a 100644 --- a/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result +++ b/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result @@ -171,7 +171,7 @@ include/start_slave.inc *** MDEV-4692: mysql.gtid_slave_pos accumulates values for a domain *** SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id; domain_id COUNT(*) -0 2 +0 3 1 2 connection server_1; INSERT INTO t1 VALUES (11); @@ -179,7 +179,7 @@ connection server_2; FLUSH NO_WRITE_TO_BINLOG TABLES; SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id; domain_id COUNT(*) -0 2 +0 4 1 2 include/start_slave.inc connection server_1; @@ -189,8 +189,8 @@ connection server_2; FLUSH NO_WRITE_TO_BINLOG TABLES; SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id; domain_id COUNT(*) -0 2 -1 2 +0 3 +1 1 *** MDEV-4650: show variables; ERROR 1946 (HY000): Failed to load replication slave GTID position *** connection server_2; SET sql_log_bin=0; diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result index ca202a66b0e..83343e52cab 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result +++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result @@ -12,6 +12,8 @@ SET GLOBAL slave_parallel_threads=10; CHANGE MASTER TO master_use_gtid=slave_pos; SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; SET GLOBAL slave_parallel_mode='optimistic'; +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size; +SET GLOBAL gtid_cleanup_batch_size= 1000000; connection server_1; INSERT INTO t1 VALUES(1,1); BEGIN; @@ -131,6 +133,11 @@ c 204 205 206 +SELECT IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*))) +FROM mysql.gtid_slave_pos; +IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*))) +OK +SET GLOBAL gtid_cleanup_batch_size=1; *** Test @@skip_parallel_replication. *** connection server_2; include/stop_slave.inc @@ -651,9 +658,10 @@ DROP TABLE t1, t2, t3; include/save_master_gtid.inc connection server_2; include/sync_with_master_gtid.inc -Check that no more than the expected last four GTIDs are in mysql.gtid_slave_pos -select count(4) <= 4 from mysql.gtid_slave_pos order by domain_id, sub_id; -count(4) <= 4 +SELECT COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size +FROM mysql.gtid_slave_pos; +COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size 1 +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size; connection server_1; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test b/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test index 1e5d1abbb3b..5c17653da8a 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test @@ -28,37 +28,79 @@ INSERT INTO t1 VALUES (1); # Inject an artificial error deleting entries, and check that the error handling code works. --connection slave --source include/stop_slave.inc +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size; +SET GLOBAL gtid_cleanup_batch_size= 2; SET @old_dbug= @@GLOBAL.debug_dbug; SET GLOBAL debug_dbug="+d,gtid_slave_pos_simulate_failed_delete"; SET sql_log_bin= 0; -CALL mtr.add_suppression("Can't find file"); +CALL mtr.add_suppression(" Error deleting old GTID row"); SET sql_log_bin= 1; --source include/start_slave.inc --connection master -INSERT INTO t1 VALUES (2); +--disable_query_log +let $i = 20; +while ($i) { + eval INSERT INTO t1 VALUES ($i+10); + dec $i; +} +--enable_query_log +--save_master_pos --connection slave ---let $slave_sql_errno= 1942 ---source include/wait_for_slave_sql_error.inc -STOP SLAVE IO_THREAD; -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos - ORDER BY domain_id, sub_id DESC LIMIT 1; +--sync_with_master + +# Now wait for the slave background thread to try to delete old rows and +# hit the error injection. +--let _TEST_MYSQLD_ERROR_LOG=$MYSQLTEST_VARDIR/log/mysqld.2.err +--perl + open F, '<', $ENV{'_TEST_MYSQLD_ERROR_LOG'} or die; + outer: while (1) { + inner: while () { + last outer if / Error deleting old GTID row/; + } + # Easy way to do sub-second sleep without extra modules. + select(undef, undef, undef, 0.1); + } +EOF + +# Since we injected error in the cleanup code, the rows should remain in +# mysql.gtid_slave_pos. Check that we have at least 20 (more robust against +# non-deterministic cleanup and future changes than checking for exact number). +SELECT COUNT(*), MAX(seq_no) INTO @pre_count, @pre_max_seq_no + FROM mysql.gtid_slave_pos; +SELECT IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count)); SET GLOBAL debug_dbug= @old_dbug; ---source include/start_slave.inc --connection master -INSERT INTO t1 VALUES (3); +--disable_query_log +let $i = 20; +while ($i) { + eval INSERT INTO t1 VALUES ($i+40); + dec $i; +} +--enable_query_log --sync_slave_with_master --connection slave -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos - ORDER BY domain_id, sub_id DESC LIMIT 1; -SELECT * FROM t1 ORDER BY i; - +# Now check that 1) rows are being deleted again after removing error +# injection, and 2) old rows are left that failed their delete while errors +# where injected (again compensating for non-deterministic deletion). +# Deletion is async and slightly non-deterministic, so we wait for at +# least 10 of the 20 new rows to be deleted. +let $wait_condition= + SELECT COUNT(*) <= 20-10 + FROM mysql.gtid_slave_pos + WHERE seq_no > @pre_max_seq_no; +--source include/wait_condition.inc +SELECT IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*))) + FROM mysql.gtid_slave_pos + WHERE seq_no <= @pre_max_seq_no; # Clean up --connection master DROP TABLE t1; +--connection slave +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size; --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test index e08472d5f51..0060cf4416c 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test @@ -21,6 +21,10 @@ SET GLOBAL slave_parallel_threads=10; CHANGE MASTER TO master_use_gtid=slave_pos; SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; SET GLOBAL slave_parallel_mode='optimistic'; +# Run the first part of the test with high batch size and see that +# old rows remain in the table. +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size; +SET GLOBAL gtid_cleanup_batch_size= 1000000; --connection server_1 @@ -108,7 +112,12 @@ SELECT * FROM t3 ORDER BY c; SELECT * FROM t1 ORDER BY a; SELECT * FROM t2 ORDER BY a; SELECT * FROM t3 ORDER BY c; -#SHOW STATUS LIKE 'Slave_retried_transactions'; +# Check that we have a bunch of old rows left-over - they were not deleted +# due to high @@gtid_cleanup_batch_size. Then set a low +# @@gtid_cleanup_batch_size so we can test that rows start being deleted. +SELECT IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*))) + FROM mysql.gtid_slave_pos; +SET GLOBAL gtid_cleanup_batch_size=1; --echo *** Test @@skip_parallel_replication. *** @@ -557,25 +566,18 @@ DROP TABLE t1, t2, t3; --connection server_2 --source include/sync_with_master_gtid.inc -# Check for left-over rows in table mysql.gtid_slave_pos (MDEV-12147). -# -# There was a bug when a transaction got a conflict and was rolled back. It -# might have also handled deletion of some old rows, and these deletions would -# then also be rolled back. And since the deletes were never re-tried, old no -# longer needed rows would accumulate in the table without limit. -# -# The earlier part of this test file have plenty of transactions being rolled -# back. But the last DROP TABLE statement runs on its own and should never -# conflict, thus at this point the mysql.gtid_slave_pos table should be clean. -# -# To support @@gtid_pos_auto_engines, when a row is inserted in the table, it -# is associated with the engine of the table at insertion time, and it will -# only be deleted during record_gtid from a table of the same engine. Since we -# alter the table from MyISAM to InnoDB at the start of this test, we should -# end up with 4 rows: two left-over from when the table was MyISAM, and two -# left-over from the InnoDB part. ---echo Check that no more than the expected last four GTIDs are in mysql.gtid_slave_pos -select count(4) <= 4 from mysql.gtid_slave_pos order by domain_id, sub_id; +# Check that old rows are deleted from mysql.gtid_slave_pos. +# Deletion is asynchronous, so use wait_condition.inc. +# Also, there is a small amount of non-determinism in the deletion of old +# rows, so it is not guaranteed that there can never be more than +# @@gtid_cleanup_batch_size rows in the table; so allow a bit of slack +# here. +let $wait_condition= + SELECT COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size + FROM mysql.gtid_slave_pos; +--source include/wait_condition.inc +eval $wait_condition; +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size; --connection server_1 --source include/rpl_end.inc diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 56f7a136983..71761df1ab3 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -1202,6 +1202,20 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT NULL +VARIABLE_NAME GTID_CLEANUP_BATCH_SIZE +SESSION_VALUE NULL +GLOBAL_VALUE 64 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 64 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE INT UNSIGNED +VARIABLE_COMMENT Normally does not need tuning. How many old rows must accumulate in the mysql.gtid_slave_pos table before a background job will be run to delete them. Can be increased to reduce number of commits if using many different engines with --gtid_pos_auto_engines, or to reduce CPU overhead if using a huge number of different gtid_domain_ids. Can be decreased to reduce number of old rows in the table. +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 2147483647 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME GTID_CURRENT_POS SESSION_VALUE NULL GLOBAL_VALUE diff --git a/sql/log_event.cc b/sql/log_event.cc index fc56656815d..3ea6be95dda 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -5601,7 +5601,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, gtid= rgi->current_gtid; if (unlikely(rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, - rgi, false, + true, false, &hton))) { int errcode= thd->get_stmt_da()->sql_errno(); @@ -8396,7 +8396,7 @@ Gtid_list_log_event::do_apply_event(rpl_group_info *rgi) { if ((ret= rpl_global_gtid_slave_state->record_gtid(thd, &list[i], sub_id_list[i], - NULL, false, &hton))) + false, false, &hton))) return ret; rpl_global_gtid_slave_state->update_state_hash(sub_id_list[i], &list[i], hton, NULL); @@ -8933,7 +8933,7 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi) rgi->gtid_pending= false; gtid= rgi->current_gtid; - err= rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, rgi, + err= rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, true, false, &hton); if (unlikely(err)) { diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 9ff47dc1ff1..0f116395fe0 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -568,6 +568,7 @@ ulong opt_binlog_commit_wait_count= 0; ulong opt_binlog_commit_wait_usec= 0; ulong opt_slave_parallel_max_queued= 131072; my_bool opt_gtid_ignore_duplicates= FALSE; +uint opt_gtid_cleanup_batch_size= 64; const double log_10[] = { 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, diff --git a/sql/mysqld.h b/sql/mysqld.h index 75f35a6df24..9c02b9127a2 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -262,6 +262,7 @@ extern ulong opt_slave_parallel_mode; extern ulong opt_binlog_commit_wait_count; extern ulong opt_binlog_commit_wait_usec; extern my_bool opt_gtid_ignore_duplicates; +extern uint opt_gtid_cleanup_batch_size; extern ulong back_log; extern ulong executed_events; extern char language[FN_REFLEN]; diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 322b84130f2..17f474c2acf 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -79,7 +79,7 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi) rgi->gtid_pending= false; if (rgi->gtid_ignore_duplicate_state!=rpl_group_info::GTID_DUPLICATE_IGNORE) { - if (record_gtid(thd, &rgi->current_gtid, sub_id, NULL, false, &hton)) + if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false, &hton)) DBUG_RETURN(1); update_state_hash(sub_id, &rgi->current_gtid, hton, rgi); } @@ -244,7 +244,7 @@ rpl_slave_state_free_element(void *arg) rpl_slave_state::rpl_slave_state() - : last_sub_id(0), gtid_pos_tables(0), loaded(false) + : pending_gtid_count(0), last_sub_id(0), gtid_pos_tables(0), loaded(false) { mysql_mutex_init(key_LOCK_slave_state, &LOCK_slave_state, MY_MUTEX_INIT_SLOW); @@ -331,14 +331,11 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, } } rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_NULL; - -#ifdef HAVE_REPLICATION - rgi->pending_gtid_deletes_clear(); -#endif } if (!(list_elem= (list_element *)my_malloc(sizeof(*list_elem), MYF(MY_WME)))) return 1; + list_elem->domain_id= domain_id; list_elem->server_id= server_id; list_elem->sub_id= sub_id; list_elem->seq_no= seq_no; @@ -348,6 +345,15 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, if (last_sub_id < sub_id) last_sub_id= sub_id; +#ifdef HAVE_REPLICATION + ++pending_gtid_count; + if (pending_gtid_count >= opt_gtid_cleanup_batch_size) + { + pending_gtid_count = 0; + slave_background_gtid_pending_delete_request(); + } +#endif + return 0; } @@ -382,20 +388,22 @@ rpl_slave_state::get_element(uint32 domain_id) int -rpl_slave_state::put_back_list(uint32 domain_id, list_element *list) +rpl_slave_state::put_back_list(list_element *list) { - element *e; + element *e= NULL; int err= 0; mysql_mutex_lock(&LOCK_slave_state); - if (!(e= (element *)my_hash_search(&hash, (const uchar *)&domain_id, 0))) - { - err= 1; - goto end; - } while (list) { list_element *next= list->next; + + if ((!e || e->domain_id != list->domain_id) && + !(e= (element *)my_hash_search(&hash, (const uchar *)&list->domain_id, 0))) + { + err= 1; + goto end; + } e->add(list); list= next; } @@ -572,12 +580,12 @@ rpl_slave_state::select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename) /* Write a gtid to the replication slave state table. + Do it as part of the transaction, to get slave crash safety, or as a separate + transaction if !in_transaction (eg. MyISAM or DDL). + gtid The global transaction id for this event group. sub_id Value allocated within the sub_id when the event group was read (sub_id must be consistent with commit order in master binlog). - rgi rpl_group_info context, if we are recording the gtid transactionally - as part of replicating a transactional event. NULL if called from - outside of a replicated transaction. Note that caller must later ensure that the new gtid and sub_id is inserted into the appropriate HASH element with rpl_slave_state.add(), so that it can @@ -585,16 +593,13 @@ rpl_slave_state::select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename) */ int rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, - rpl_group_info *rgi, bool in_statement, + bool in_transaction, bool in_statement, void **out_hton) { TABLE_LIST tlist; int err= 0, not_sql_thread; bool table_opened= false; TABLE *table; - list_element *delete_list= 0, *next, *cur, **next_ptr_ptr, **best_ptr_ptr; - uint64 best_sub_id; - element *elem; ulonglong thd_saved_option= thd->variables.option_bits; Query_tables_list lex_backup; wait_for_commit* suspended_wfc; @@ -684,7 +689,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, thd->wsrep_ignore_table= true; #endif - if (!rgi) + if (!in_transaction) { DBUG_PRINT("info", ("resetting OPTION_BEGIN")); thd->variables.option_bits&= @@ -716,113 +721,6 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, my_error(ER_OUT_OF_RESOURCES, MYF(0)); goto end; } - - mysql_mutex_lock(&LOCK_slave_state); - if ((elem= get_element(gtid->domain_id)) == NULL) - { - mysql_mutex_unlock(&LOCK_slave_state); - my_error(ER_OUT_OF_RESOURCES, MYF(0)); - err= 1; - goto end; - } - - /* Now pull out all GTIDs that were recorded in this engine. */ - delete_list = NULL; - next_ptr_ptr= &elem->list; - cur= elem->list; - best_sub_id= 0; - best_ptr_ptr= NULL; - while (cur) - { - list_element *next= cur->next; - if (cur->hton == hton) - { - /* Belongs to same engine, so move it to the delete list. */ - cur->next= delete_list; - delete_list= cur; - if (cur->sub_id > best_sub_id) - { - best_sub_id= cur->sub_id; - best_ptr_ptr= &delete_list; - } - else if (best_ptr_ptr == &delete_list) - best_ptr_ptr= &cur->next; - } - else - { - /* Another engine, leave it in the list. */ - if (cur->sub_id > best_sub_id) - { - best_sub_id= cur->sub_id; - /* Current best is not on the delete list. */ - best_ptr_ptr= NULL; - } - *next_ptr_ptr= cur; - next_ptr_ptr= &cur->next; - } - cur= next; - } - *next_ptr_ptr= NULL; - /* - If the highest sub_id element is on the delete list, put it back on the - original list, to preserve the highest sub_id element in the table for - GTID position recovery. - */ - if (best_ptr_ptr) - { - cur= *best_ptr_ptr; - *best_ptr_ptr= cur->next; - cur->next= elem->list; - elem->list= cur; - } - mysql_mutex_unlock(&LOCK_slave_state); - - if (!delete_list) - goto end; - - /* Now delete any already committed GTIDs. */ - bitmap_set_bit(table->read_set, table->field[0]->field_index); - bitmap_set_bit(table->read_set, table->field[1]->field_index); - - if ((err= table->file->ha_index_init(0, 0))) - { - table->file->print_error(err, MYF(0)); - goto end; - } - cur = delete_list; - while (cur) - { - uchar key_buffer[4+8]; - - DBUG_EXECUTE_IF("gtid_slave_pos_simulate_failed_delete", - { err= ENOENT; - table->file->print_error(err, MYF(0)); - /* `break' does not work inside DBUG_EXECUTE_IF */ - goto dbug_break; }); - - next= cur->next; - - table->field[1]->store(cur->sub_id, true); - /* domain_id is already set in table->record[0] from write_row() above. */ - key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false); - if (table->file->ha_index_read_map(table->record[1], key_buffer, - HA_WHOLE_KEY, HA_READ_KEY_EXACT)) - /* We cannot find the row, assume it is already deleted. */ - ; - else if ((err= table->file->ha_delete_row(table->record[1]))) - table->file->print_error(err, MYF(0)); - /* - In case of error, we still discard the element from the list. We do - not want to endlessly error on the same element in case of table - corruption or such. - */ - cur= next; - if (err) - break; - } -IF_DBUG(dbug_break:, ) - table->file->ha_index_end(); - end: #ifdef WITH_WSREP @@ -832,42 +730,13 @@ end: if (table_opened) { if (err || (err= ha_commit_trans(thd, FALSE))) - { - /* - If error, we need to put any remaining delete_list back into the HASH - so we can do another delete attempt later. - */ - if (delete_list) - { - put_back_list(gtid->domain_id, delete_list); - delete_list = 0; - } - ha_rollback_trans(thd, FALSE); - } + close_thread_tables(thd); - if (rgi) - { + if (in_transaction) thd->mdl_context.release_statement_locks(); - /* - Save the list of old gtid entries we deleted. If this transaction - fails later for some reason and is rolled back, the deletion of those - entries will be rolled back as well, and we will need to put them back - on the to-be-deleted list so we can re-do the deletion. Otherwise - redundant rows in mysql.gtid_slave_pos may accumulate if transactions - are rolled back and retried after record_gtid(). - */ -#ifdef HAVE_REPLICATION - rgi->pending_gtid_deletes_save(gtid->domain_id, delete_list); -#endif - } else - { thd->mdl_context.release_transactional_locks(); -#ifdef HAVE_REPLICATION - rpl_group_info::pending_gtid_deletes_free(delete_list); -#endif - } } thd->lex->restore_backup_query_tables_list(&lex_backup); thd->variables.option_bits= thd_saved_option; @@ -881,6 +750,254 @@ end: } +/* + Return a list of all old GTIDs in any mysql.gtid_slave_pos* table that are + no longer needed and can be deleted from the table. + + Within each domain, we need to keep around the latest GTID (the one with the + highest sub_id), but any others in that domain can be deleted. +*/ +rpl_slave_state::list_element * +rpl_slave_state::gtid_grab_pending_delete_list() +{ + uint32 i; + list_element *full_list; + + mysql_mutex_lock(&LOCK_slave_state); + full_list= NULL; + for (i= 0; i < hash.records; ++i) + { + element *elem= (element *)my_hash_element(&hash, i); + list_element *elist= elem->list; + list_element *last_elem, **best_ptr_ptr, *cur, *next; + uint64 best_sub_id; + + if (!elist) + continue; /* Nothing here */ + + /* Delete any old stuff, but keep around the most recent one. */ + cur= elist; + best_sub_id= cur->sub_id; + best_ptr_ptr= &elist; + last_elem= cur; + while ((next= cur->next)) { + last_elem= next; + if (next->sub_id > best_sub_id) + { + best_sub_id= next->sub_id; + best_ptr_ptr= &cur->next; + } + cur= next; + } + /* + Append the new elements to the full list. Note the order is important; + we do it here so that we do not break the list if best_sub_id is the + last of the new elements. + */ + last_elem->next= full_list; + /* + Delete the highest sub_id element from the old list, and put it back as + the single-element new list. + */ + cur= *best_ptr_ptr; + *best_ptr_ptr= cur->next; + cur->next= NULL; + elem->list= cur; + + /* + Collect the full list so far here. Note that elist may have moved if we + deleted the first element, so order is again important. + */ + full_list= elist; + } + mysql_mutex_unlock(&LOCK_slave_state); + + return full_list; +} + + +/* Find the mysql.gtid_slave_posXXX table associated with a given hton. */ +LEX_CSTRING * +rpl_slave_state::select_gtid_pos_table(void *hton) +{ + struct gtid_pos_table *table_entry; + + /* + See comments on rpl_slave_state::gtid_pos_tables for rules around proper + access to the list. + */ + table_entry= (struct gtid_pos_table *) + my_atomic_loadptr_explicit(>id_pos_tables, MY_MEMORY_ORDER_ACQUIRE); + + while (table_entry) + { + if (table_entry->table_hton == hton) + { + if (likely(table_entry->state == GTID_POS_AVAILABLE)) + return &table_entry->table_name; + } + table_entry= table_entry->next; + } + + table_entry= (struct gtid_pos_table *) + my_atomic_loadptr_explicit(&default_gtid_pos_table, MY_MEMORY_ORDER_ACQUIRE); + return &table_entry->table_name; +} + + +void +rpl_slave_state::gtid_delete_pending(THD *thd, + rpl_slave_state::list_element **list_ptr) +{ + int err= 0; + ulonglong thd_saved_option; + + if (unlikely(!loaded)) + return; + +#ifdef WITH_WSREP + /* + Updates in slave state table should not be appended to galera transaction + writeset. + */ + thd->wsrep_ignore_table= true; +#endif + + thd_saved_option= thd->variables.option_bits; + thd->variables.option_bits&= + ~(ulonglong)(OPTION_NOT_AUTOCOMMIT |OPTION_BEGIN |OPTION_BIN_LOG | + OPTION_GTID_BEGIN); + + while (*list_ptr) + { + LEX_CSTRING *gtid_pos_table_name, *tmp_table_name; + Query_tables_list lex_backup; + TABLE_LIST tlist; + TABLE *table; + handler::Table_flags direct_pos; + list_element *cur, **cur_ptr_ptr; + bool table_opened= false; + void *hton= (*list_ptr)->hton; + + thd->reset_for_next_command(); + + /* + Only the SQL thread can call select_gtid_pos_table without a mutex + Other threads needs to use a mutex and take into account that the + result may change during execution, so we have to make a copy. + */ + mysql_mutex_lock(&LOCK_slave_state); + tmp_table_name= select_gtid_pos_table(hton); + gtid_pos_table_name= thd->make_clex_string(tmp_table_name->str, + tmp_table_name->length); + mysql_mutex_unlock(&LOCK_slave_state); + if (!gtid_pos_table_name) + { + /* Out of memory - we can try again later. */ + break; + } + + thd->lex->reset_n_backup_query_tables_list(&lex_backup); + tlist.init_one_table(&MYSQL_SCHEMA_NAME, gtid_pos_table_name, NULL, TL_WRITE); + if ((err= open_and_lock_tables(thd, &tlist, FALSE, 0))) + goto end; + table_opened= true; + table= tlist.table; + + if ((err= gtid_check_rpl_slave_state_table(table))) + goto end; + + direct_pos= table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION; + bitmap_set_all(table->write_set); + table->rpl_write_set= table->write_set; + + /* Now delete any already committed GTIDs. */ + bitmap_set_bit(table->read_set, table->field[0]->field_index); + bitmap_set_bit(table->read_set, table->field[1]->field_index); + + if (!direct_pos && (err= table->file->ha_index_init(0, 0))) + { + table->file->print_error(err, MYF(0)); + goto end; + } + + cur = *list_ptr; + cur_ptr_ptr = list_ptr; + do + { + uchar key_buffer[4+8]; + list_element *next= cur->next; + + if (cur->hton == hton) + { + int res; + + table->field[0]->store((ulonglong)cur->domain_id, true); + table->field[1]->store(cur->sub_id, true); + if (direct_pos) + { + res= table->file->ha_rnd_pos_by_record(table->record[0]); + } + else + { + key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false); + res= table->file->ha_index_read_map(table->record[0], key_buffer, + HA_WHOLE_KEY, HA_READ_KEY_EXACT); + } + DBUG_EXECUTE_IF("gtid_slave_pos_simulate_failed_delete", + { res= 1; + err= ENOENT; + sql_print_error(" Error deleting old GTID row"); + }); + if (res) + /* We cannot find the row, assume it is already deleted. */ + ; + else if ((err= table->file->ha_delete_row(table->record[0]))) + { + sql_print_error("Error deleting old GTID row: %s", + thd->get_stmt_da()->message()); + /* + In case of error, we still discard the element from the list. We do + not want to endlessly error on the same element in case of table + corruption or such. + */ + } + *cur_ptr_ptr= next; + my_free(cur); + } + else + { + /* Leave this one in the list until we get to the table for its hton. */ + cur_ptr_ptr= &cur->next; + } + cur= next; + if (err) + break; + } while (cur); +end: + if (table_opened) + { + if (!direct_pos) + table->file->ha_index_end(); + + if (err || (err= ha_commit_trans(thd, FALSE))) + ha_rollback_trans(thd, FALSE); + } + close_thread_tables(thd); + thd->mdl_context.release_transactional_locks(); + thd->lex->restore_backup_query_tables_list(&lex_backup); + + if (err) + break; + } + thd->variables.option_bits= thd_saved_option; + +#ifdef WITH_WSREP + thd->wsrep_ignore_table= false; +#endif +} + + uint64 rpl_slave_state::next_sub_id(uint32 domain_id) { @@ -1251,7 +1368,7 @@ rpl_slave_state::load(THD *thd, const char *state_from_master, size_t len, if (gtid_parser_helper(&state_from_master, end, >id) || !(sub_id= next_sub_id(gtid.domain_id)) || - record_gtid(thd, >id, sub_id, NULL, in_statement, &hton) || + record_gtid(thd, >id, sub_id, false, in_statement, &hton) || update(gtid.domain_id, gtid.server_id, sub_id, gtid.seq_no, hton, NULL)) return 1; if (state_from_master == end) diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h index 0fc92d5e33c..60d822f7b0d 100644 --- a/sql/rpl_gtid.h +++ b/sql/rpl_gtid.h @@ -118,8 +118,9 @@ struct rpl_slave_state { struct list_element *next; uint64 sub_id; - uint64 seq_no; + uint32 domain_id; uint32 server_id; + uint64 seq_no; /* hton of mysql.gtid_slave_pos* table used to record this GTID. Can be NULL if the gtid table failed to load (eg. missing @@ -191,6 +192,8 @@ struct rpl_slave_state /* Mapping from domain_id to its element. */ HASH hash; + /* GTIDs added since last purge of old mysql.gtid_slave_pos rows. */ + uint32 pending_gtid_count; /* Mutex protecting access to the state. */ mysql_mutex_t LOCK_slave_state; /* Auxiliary buffer to sort gtid list. */ @@ -233,7 +236,10 @@ struct rpl_slave_state int truncate_state_table(THD *thd); void select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename); int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, - rpl_group_info *rgi, bool in_statement, void **out_hton); + bool in_transaction, bool in_statement, void **out_hton); + list_element *gtid_grab_pending_delete_list(); + LEX_CSTRING *select_gtid_pos_table(void *hton); + void gtid_delete_pending(THD *thd, rpl_slave_state::list_element **list_ptr); uint64 next_sub_id(uint32 domain_id); int iterate(int (*cb)(rpl_gtid *, void *), void *data, rpl_gtid *extra_gtids, uint32 num_extra, @@ -245,7 +251,7 @@ struct rpl_slave_state bool is_empty(); element *get_element(uint32 domain_id); - int put_back_list(uint32 domain_id, list_element *list); + int put_back_list(list_element *list); void update_state_hash(uint64 sub_id, rpl_gtid *gtid, void *hton, rpl_group_info *rgi); diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index b275ad884bd..2d91620c898 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1820,6 +1820,7 @@ rpl_load_gtid_slave_state(THD *thd) int err= 0; uint32 i; load_gtid_state_cb_data cb_data; + rpl_slave_state::list_element *old_gtids_list; DBUG_ENTER("rpl_load_gtid_slave_state"); mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); @@ -1905,6 +1906,13 @@ rpl_load_gtid_slave_state(THD *thd) rpl_global_gtid_slave_state->loaded= true; mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); + /* Clear out no longer needed elements now. */ + old_gtids_list= + rpl_global_gtid_slave_state->gtid_grab_pending_delete_list(); + rpl_global_gtid_slave_state->gtid_delete_pending(thd, &old_gtids_list); + if (old_gtids_list) + rpl_global_gtid_slave_state->put_back_list(old_gtids_list); + end: if (array_inited) delete_dynamic(&array); @@ -2086,7 +2094,6 @@ rpl_group_info::reinit(Relay_log_info *rli) long_find_row_note_printed= false; did_mark_start_commit= false; gtid_ev_flags2= 0; - pending_gtid_delete_list= NULL; last_master_timestamp = 0; gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL; speculation= SPECULATE_NO; @@ -2217,12 +2224,6 @@ void rpl_group_info::cleanup_context(THD *thd, bool error) erroneously update the GTID position. */ gtid_pending= false; - - /* - Rollback will have undone any deletions of old rows we might have made - in mysql.gtid_slave_pos. Put those rows back on the list to be deleted. - */ - pending_gtid_deletes_put_back(); } m_table_map.clear_tables(); slave_close_thread_tables(thd); @@ -2448,78 +2449,6 @@ rpl_group_info::unmark_start_commit() } -/* - When record_gtid() has deleted any old rows from the table - mysql.gtid_slave_pos as part of a replicated transaction, save the list of - rows deleted here. - - If later the transaction fails (eg. optimistic parallel replication), the - deletes will be undone when the transaction is rolled back. Then we can - put back the list of rows into the rpl_global_gtid_slave_state, so that - we can re-do the deletes and avoid accumulating old rows in the table. -*/ -void -rpl_group_info::pending_gtid_deletes_save(uint32 domain_id, - rpl_slave_state::list_element *list) -{ - /* - We should never get to a state where we try to save a new pending list of - gtid deletes while we still have an old one. But make sure we handle it - anyway just in case, so we avoid leaving stray entries in the - mysql.gtid_slave_pos table. - */ - DBUG_ASSERT(!pending_gtid_delete_list); - if (unlikely(pending_gtid_delete_list)) - pending_gtid_deletes_put_back(); - - pending_gtid_delete_list= list; - pending_gtid_delete_list_domain= domain_id; -} - - -/* - Take the list recorded by pending_gtid_deletes_save() and put it back into - rpl_global_gtid_slave_state. This is needed if deletion of the rows was - rolled back due to transaction failure. -*/ -void -rpl_group_info::pending_gtid_deletes_put_back() -{ - if (pending_gtid_delete_list) - { - rpl_global_gtid_slave_state->put_back_list(pending_gtid_delete_list_domain, - pending_gtid_delete_list); - pending_gtid_delete_list= NULL; - } -} - - -/* - Free the list recorded by pending_gtid_deletes_save(). Done when the deletes - in the list have been permanently committed. -*/ -void -rpl_group_info::pending_gtid_deletes_clear() -{ - pending_gtid_deletes_free(pending_gtid_delete_list); - pending_gtid_delete_list= NULL; -} - - -void -rpl_group_info::pending_gtid_deletes_free(rpl_slave_state::list_element *list) -{ - rpl_slave_state::list_element *next; - - while (list) - { - next= list->next; - my_free(list); - list= next; - } -} - - rpl_sql_thread_info::rpl_sql_thread_info(Rpl_filter *filter) : rpl_filter(filter) { diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index d9f0e0e5d3b..b8b153c34be 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -757,11 +757,6 @@ struct rpl_group_info /* Needs room for "Gtid D-S-N\x00". */ char gtid_info_buf[5+10+1+10+1+20+1]; - /* List of not yet committed deletions in mysql.gtid_slave_pos. */ - rpl_slave_state::list_element *pending_gtid_delete_list; - /* Domain associated with pending_gtid_delete_list. */ - uint32 pending_gtid_delete_list_domain; - /* The timestamp, from the master, of the commit event. Used to do delayed update of rli->last_master_timestamp, for getting @@ -903,12 +898,6 @@ struct rpl_group_info char *gtid_info(); void unmark_start_commit(); - static void pending_gtid_deletes_free(rpl_slave_state::list_element *list); - void pending_gtid_deletes_save(uint32 domain_id, - rpl_slave_state::list_element *list); - void pending_gtid_deletes_put_back(); - void pending_gtid_deletes_clear(); - longlong get_row_stmt_start_timestamp() { return row_stmt_start_timestamp; diff --git a/sql/slave.cc b/sql/slave.cc index 2a8c977451d..03b730b2b00 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -465,6 +465,8 @@ static struct slave_background_gtid_pos_create_t { void *hton; } *slave_background_gtid_pos_create_list; +static volatile bool slave_background_gtid_pending_delete_flag; + pthread_handler_t handle_slave_background(void *arg __attribute__((unused))) @@ -499,6 +501,7 @@ handle_slave_background(void *arg __attribute__((unused))) { slave_background_kill_t *kill_list; slave_background_gtid_pos_create_t *create_list; + bool pending_deletes; thd->ENTER_COND(&COND_slave_background, &LOCK_slave_background, &stage_slave_background_wait_request, @@ -508,13 +511,15 @@ handle_slave_background(void *arg __attribute__((unused))) stop= abort_loop || thd->killed || slave_background_thread_stop; kill_list= slave_background_kill_list; create_list= slave_background_gtid_pos_create_list; - if (stop || kill_list || create_list) + pending_deletes= slave_background_gtid_pending_delete_flag; + if (stop || kill_list || create_list || pending_deletes) break; mysql_cond_wait(&COND_slave_background, &LOCK_slave_background); } slave_background_kill_list= NULL; slave_background_gtid_pos_create_list= NULL; + slave_background_gtid_pending_delete_flag= false; thd->EXIT_COND(&old_stage); while (kill_list) @@ -541,6 +546,17 @@ handle_slave_background(void *arg __attribute__((unused))) create_list= next; } + if (pending_deletes) + { + rpl_slave_state::list_element *list; + + slave_background_gtid_pending_delete_flag= false; + list= rpl_global_gtid_slave_state->gtid_grab_pending_delete_list(); + rpl_global_gtid_slave_state->gtid_delete_pending(thd, &list); + if (list) + rpl_global_gtid_slave_state->put_back_list(list); + } + mysql_mutex_lock(&LOCK_slave_background); } while (!stop); @@ -614,6 +630,23 @@ slave_background_gtid_pos_create_request( } +/* + Request the slave background thread to delete no longer used rows from the + mysql.gtid_slave_pos* tables. + + This is called from time-critical rpl_slave_state::update(), so we avoid + taking any locks here. This means we may race with the background thread + to occasionally lose a signal. This is not a problem; any pending rows to + be deleted will just be deleted a bit later as part of the next batch. +*/ +void +slave_background_gtid_pending_delete_request(void) +{ + slave_background_gtid_pending_delete_flag= true; + mysql_cond_signal(&COND_slave_background); +} + + /* Start the slave background thread. diff --git a/sql/slave.h b/sql/slave.h index 649d55b45b9..12d569b0333 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -276,6 +276,7 @@ bool net_request_file(NET* net, const char* fname); void slave_background_kill_request(THD *to_kill); void slave_background_gtid_pos_create_request (rpl_slave_state::gtid_pos_table *table_entry); +void slave_background_gtid_pending_delete_request(void); extern bool volatile abort_loop; extern Master_info *active_mi; /* active_mi for multi-master */ diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 3a4871875e5..22d017fe345 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -1942,6 +1942,19 @@ Sys_var_last_gtid::session_value_ptr(THD *thd, const LEX_CSTRING *base) } +static Sys_var_uint Sys_gtid_cleanup_batch_size( + "gtid_cleanup_batch_size", + "Normally does not need tuning. How many old rows must accumulate in " + "the mysql.gtid_slave_pos table before a background job will be run to " + "delete them. Can be increased to reduce number of commits if " + "using many different engines with --gtid_pos_auto_engines, or to " + "reduce CPU overhead if using a huge number of different " + "gtid_domain_ids. Can be decreased to reduce number of old rows in the " + "table.", + GLOBAL_VAR(opt_gtid_cleanup_batch_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,2147483647), DEFAULT(64), BLOCK_SIZE(1)); + + static bool check_slave_parallel_threads(sys_var *self, THD *thd, set_var *var) { diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result index 9c20fea97ae..a1e501f78f4 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result @@ -2,6 +2,7 @@ include/master-slave.inc [connection master] connection server_2; include/stop_slave.inc +SET GLOBAL gtid_cleanup_batch_size = 999999999; CHANGE MASTER TO master_use_gtid=slave_pos; SET sql_log_bin=0; CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos; @@ -41,6 +42,8 @@ a 1 SELECT * FROM mysql.gtid_slave_pos ORDER BY sub_id; domain_id sub_id server_id seq_no +0 1 1 1 +0 2 1 2 0 3 1 3 0 4 1 4 SELECT * FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb @@ -121,6 +124,21 @@ Transactions_multi_engine 6 DELETE FROM t1 WHERE a >= 100; DELETE FROM t2 WHERE a >= 100; DELETE FROM t3 WHERE a >= 100; +connection server_1; +include/save_master_gtid.inc +connection server_2; +include/sync_with_master_gtid.inc +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos; +COUNT(*)>=10 +1 +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb +UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select; +COUNT(*)>=10 +1 +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_rocksdb; +COUNT(*)>=10 +1 +SET GLOBAL gtid_cleanup_batch_size = 3; connection server_2; include/stop_slave.inc SET sql_log_bin=0; diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test index e0d16e7f242..631d9ca533f 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test @@ -4,6 +4,12 @@ --connection server_2 --source include/stop_slave.inc + +# Set GTID cleanup limit high enough that cleanup will not run and we +# can rely on consistent table output in .result. +--let $old_gtid_cleanup_batch_size=`SELECT @@GLOBAL.gtid_cleanup_batch_size` +SET GLOBAL gtid_cleanup_batch_size = 999999999; + CHANGE MASTER TO master_use_gtid=slave_pos; SET sql_log_bin=0; CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos; @@ -89,6 +95,82 @@ DELETE FROM t2 WHERE a >= 100; DELETE FROM t3 WHERE a >= 100; +# Create a bunch more GTIDs in mysql.gtid_slave_pos* tables to test with. +--connection server_1 +--disable_query_log +let $i=10; +while ($i) { + eval INSERT INTO t1 VALUES (300+$i); + eval INSERT INTO t2 VALUES (300+$i); + eval INSERT INTO t3 VALUES (300+$i); + dec $i; +} +--enable_query_log +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc + +# Check that we have many rows in mysql.gtid_slave_pos now (since +# @@gtid_cleanup_batch_size was set to a huge value). No need to check +# for an exact number, since that will require changing .result if +# anything changes prior to this point, and we just need to know that +# we have still have some data in the tables to make the following +# test effective. +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos; +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select; +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_rocksdb; + +# Check that old GTID rows will be deleted when batch delete size is +# set reasonably. Old row deletion is not 100% deterministic (by design), so +# we must wait for it to occur, but it should occur eventually. +SET GLOBAL gtid_cleanup_batch_size = 3; +let $i=40; +--disable_query_log +--let $keep_include_silent=1 +while ($i) { + let N=`SELECT 1+($i MOD 3)`; + --connection server_1 + eval UPDATE t$N SET a=a+1 WHERE a=(SELECT MAX(a) FROM t$N); + --source include/save_master_gtid.inc + --connection server_2 + --source include/sync_with_master_gtid.inc + let $j=50; + while ($j) { + let $is_done=`SELECT SUM(a)=1 FROM ( + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos + UNION ALL + SELECT COUNT(*) AS a FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select + UNION ALL + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos_rocksdb) outer_select`; + if ($is_done) { + let $j=0; + } + if (!$is_done) { + real_sleep 0.1; + dec $j; + } + } + dec $i; + if ($is_done) { + let $i=0; + } +} +--enable_query_log +--let $keep_include_silent=0 +if (!$is_done) { + --echo Timed out waiting for mysql.gtid_slave_pos* tables to be cleaned up +} + +--disable_query_log +DELETE FROM t1 WHERE a >= 100; +DELETE FROM t2 WHERE a >= 100; +DELETE FROM t3 WHERE a >= 100; +--enable_query_log + + # Test status variables Rpl_transactions_multi_engine and Transactions_gtid_foreign_engine. # Have mysql.gtid_slave_pos* for myisam and innodb but not rocksdb. --connection server_2 @@ -223,6 +305,9 @@ SHOW STATUS LIKE "%transactions%engine"; SET sql_log_bin=0; DROP TABLE mysql.gtid_slave_pos_innodb; SET sql_log_bin=1; +--disable_query_log +eval SET GLOBAL gtid_cleanup_batch_size = $old_gtid_cleanup_batch_size; +--enable_query_log --connection server_1 DROP TABLE t1; diff --git a/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result b/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result index d4532eec4e2..d79e7e59aa4 100644 --- a/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result +++ b/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result @@ -2,6 +2,7 @@ include/master-slave.inc [connection master] connection server_2; include/stop_slave.inc +SET GLOBAL gtid_cleanup_batch_size = 999999999; CHANGE MASTER TO master_use_gtid=slave_pos; SET sql_log_bin=0; CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos; @@ -41,6 +42,8 @@ a 1 SELECT * FROM mysql.gtid_slave_pos ORDER BY sub_id; domain_id sub_id server_id seq_no +0 1 1 1 +0 2 1 2 0 3 1 3 0 4 1 4 SELECT * FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb @@ -121,6 +124,21 @@ Transactions_multi_engine 6 DELETE FROM t1 WHERE a >= 100; DELETE FROM t2 WHERE a >= 100; DELETE FROM t3 WHERE a >= 100; +connection server_1; +include/save_master_gtid.inc +connection server_2; +include/sync_with_master_gtid.inc +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos; +COUNT(*)>=10 +1 +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb +UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select; +COUNT(*)>=10 +1 +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_tokudb; +COUNT(*)>=10 +1 +SET GLOBAL gtid_cleanup_batch_size = 3; connection server_2; include/stop_slave.inc SET sql_log_bin=0; diff --git a/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test b/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test index ceb119cd0dc..1d19a25889e 100644 --- a/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test +++ b/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test @@ -4,6 +4,12 @@ --connection server_2 --source include/stop_slave.inc + +# Set GTID cleanup limit high enough that cleanup will not run and we +# can rely on consistent table output in .result. +--let $old_gtid_cleanup_batch_size=`SELECT @@GLOBAL.gtid_cleanup_batch_size` +SET GLOBAL gtid_cleanup_batch_size = 999999999; + CHANGE MASTER TO master_use_gtid=slave_pos; SET sql_log_bin=0; CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos; @@ -89,6 +95,82 @@ DELETE FROM t2 WHERE a >= 100; DELETE FROM t3 WHERE a >= 100; +# Create a bunch more GTIDs in mysql.gtid_slave_pos* tables to test with. +--connection server_1 +--disable_query_log +let $i=10; +while ($i) { + eval INSERT INTO t1 VALUES (300+$i); + eval INSERT INTO t2 VALUES (300+$i); + eval INSERT INTO t3 VALUES (300+$i); + dec $i; +} +--enable_query_log +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc + +# Check that we have many rows in mysql.gtid_slave_pos now (since +# @@gtid_cleanup_batch_size was set to a huge value). No need to check +# for an exact number, since that will require changing .result if +# anything changes prior to this point, and we just need to know that +# we have still have some data in the tables to make the following +# test effective. +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos; +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select; +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_tokudb; + +# Check that old GTID rows will be deleted when batch delete size is +# set reasonably. Old row deletion is not 100% deterministic (by design), so +# we must wait for it to occur, but it should occur eventually. +SET GLOBAL gtid_cleanup_batch_size = 3; +let $i=40; +--disable_query_log +--let $keep_include_silent=1 +while ($i) { + let N=`SELECT 1+($i MOD 3)`; + --connection server_1 + eval UPDATE t$N SET a=a+1 WHERE a=(SELECT MAX(a) FROM t$N); + --source include/save_master_gtid.inc + --connection server_2 + --source include/sync_with_master_gtid.inc + let $j=50; + while ($j) { + let $is_done=`SELECT SUM(a)=1 FROM ( + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos + UNION ALL + SELECT COUNT(*) AS a FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select + UNION ALL + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos_tokudb) outer_select`; + if ($is_done) { + let $j=0; + } + if (!$is_done) { + real_sleep 0.1; + dec $j; + } + } + dec $i; + if ($is_done) { + let $i=0; + } +} +--enable_query_log +--let $keep_include_silent=0 +if (!$is_done) { + --echo Timed out waiting for mysql.gtid_slave_pos* tables to be cleaned up +} + +--disable_query_log +DELETE FROM t1 WHERE a >= 100; +DELETE FROM t2 WHERE a >= 100; +DELETE FROM t3 WHERE a >= 100; +--enable_query_log + + # Test status variables Rpl_transactions_multi_engine and Transactions_gtid_foreign_engine. # Have mysql.gtid_slave_pos* for myisam and innodb but not tokudb. --connection server_2 @@ -223,6 +305,9 @@ SHOW STATUS LIKE "%transactions%engine"; SET sql_log_bin=0; DROP TABLE mysql.gtid_slave_pos_innodb; SET sql_log_bin=1; +--disable_query_log +eval SET GLOBAL gtid_cleanup_batch_size = $old_gtid_cleanup_batch_size; +--enable_query_log --connection server_1 DROP TABLE t1;