diff --git a/mysql-test/suite/rpl/r/rpl_xa_prepare_gtid_fail.result b/mysql-test/suite/rpl/r/rpl_xa_prepare_gtid_fail.result new file mode 100644 index 00000000000..f3fecbda349 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_xa_prepare_gtid_fail.result @@ -0,0 +1,51 @@ +include/master-slave.inc +[connection master] +connection slave; +include/stop_slave.inc +change master to master_use_gtid=slave_pos; +set @@global.slave_parallel_threads= 4; +set @@global.slave_parallel_mode= optimistic; +set @@global.gtid_strict_mode=ON; +set sql_log_bin= 0; +alter table mysql.gtid_slave_pos engine=innodb; +call mtr.add_suppression("Deadlock found.*"); +set sql_log_bin= 1; +include/start_slave.inc +connection master; +create table t1 (a int primary key, b int) engine=innodb; +insert t1 values (1,1); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.innodb_lock_wait_timeout= 1; +connection master; +set @@session.gtid_seq_no=100; +xa start '1'; +update t1 set b=b+10 where a=1; +xa end '1'; +xa prepare '1'; +xa commit '1'; +include/save_master_gtid.inc +connection slave; +connection slave1; +BEGIN; +SELECT * FROM mysql.gtid_slave_pos WHERE seq_no=100 FOR UPDATE; +domain_id sub_id server_id seq_no +connection slave; +include/start_slave.inc +include/wait_for_slave_sql_error.inc [errno=1942,1213] +connection slave1; +ROLLBACK; +# Cleanup +connection master; +drop table t1; +connection slave; +include/stop_slave.inc +set @@global.gtid_slave_pos= "0-1-100"; +set @@global.slave_parallel_threads= 0; +set @@global.gtid_strict_mode= 0; +set @@global.innodb_lock_wait_timeout= 50; +include/start_slave.inc +include/rpl_end.inc +# End of rpl_xa_prepare_gtid_fail.test diff --git a/mysql-test/suite/rpl/t/rpl_xa_prepare_gtid_fail.test b/mysql-test/suite/rpl/t/rpl_xa_prepare_gtid_fail.test new file mode 100644 index 00000000000..8042b355754 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_xa_prepare_gtid_fail.test @@ -0,0 +1,106 @@ +# +# When handling the replication of an XA PREPARE, the commit phase is +# bifurcated. First, the prepare is handled by the relevant storage engines. +# Then second,the GTID slave state is updated as a separate autocommit +# transaction. If the second stage fails, i.e. we are unable to update the +# GTID slave state, then the slave should immediately quit in error, without +# retry. +# +# This tests validates the above behavior by simulating a deadlock on the +# GTID slave state table during the second part of XA PREPARE's commit, to +# ensure that the appropriate error is reported and the transaction was never +# retried. +# +# +# References +# MDEV-31038: Parallel Replication Breaks if XA PREPARE Fails Updating Slave +# GTID State +# +source include/master-slave.inc; +source include/have_binlog_format_row.inc; +source include/have_innodb.inc; + +--connection slave +--source include/stop_slave.inc + +--let $save_par_thds= `SELECT @@global.slave_parallel_threads` +--let $save_strict_mode= `SELECT @@global.gtid_strict_mode` +--let $save_innodb_lock_wait_timeout= `SELECT @@global.innodb_lock_wait_timeout` + +change master to master_use_gtid=slave_pos; +set @@global.slave_parallel_threads= 4; +set @@global.slave_parallel_mode= optimistic; +set @@global.gtid_strict_mode=ON; + +set sql_log_bin= 0; +alter table mysql.gtid_slave_pos engine=innodb; +call mtr.add_suppression("Deadlock found.*"); +set sql_log_bin= 1; +--source include/start_slave.inc + +--connection master +let $datadir= `select @@datadir`; +create table t1 (a int primary key, b int) engine=innodb; +insert t1 values (1,1); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +set @@global.innodb_lock_wait_timeout= 1; + +--let $retried_tx_initial= query_get_value(SHOW ALL SLAVES STATUS, Retried_transactions, 1) + +--connection master +--let $gtid_domain_id=`SELECT @@GLOBAL.gtid_domain_id` +--let $gtid_server_id=`SELECT @@GLOBAL.server_id` +--let $xap_seq_no=100 +--eval set @@session.gtid_seq_no=$xap_seq_no +xa start '1'; +update t1 set b=b+10 where a=1; +xa end '1'; +xa prepare '1'; +--let $new_gtid= `SELECT @@global.gtid_binlog_pos` +xa commit '1'; +--source include/save_master_gtid.inc + + +--connection slave + +#--eval set statement sql_log_bin=0 for insert into mysql.gtid_slave_pos values ($gtid_domain_id, 5, $gtid_server_id, $xap_seq_no) + +--connection slave1 +BEGIN; +--eval SELECT * FROM mysql.gtid_slave_pos WHERE seq_no=$xap_seq_no FOR UPDATE + +--connection slave +--source include/start_slave.inc + +--let $slave_sql_errno= 1942,1213 +--source include/wait_for_slave_sql_error.inc + +--let $retried_tx_test= query_get_value(SHOW ALL SLAVES STATUS, Retried_transactions, 1) +if ($retried_tx_initial != $retried_tx_test) +{ + --echo Transaction was retried when a failed XA PREPARE slave GTID update should lead to immediate slave stop without retry + --die Transaction was retried when a failed XA PREPARE slave GTID update should lead to immediate slave stop without retry +} + +--connection slave1 +ROLLBACK; + +--echo # Cleanup + +--connection master +drop table t1; + +--connection slave +--source include/stop_slave.inc +--eval set @@global.gtid_slave_pos= "$new_gtid" +--eval set @@global.slave_parallel_threads= $save_par_thds +--eval set @@global.gtid_strict_mode= $save_strict_mode +--eval set @@global.innodb_lock_wait_timeout= $save_innodb_lock_wait_timeout +--source include/start_slave.inc + +--source include/rpl_end.inc +--echo # End of rpl_xa_prepare_gtid_fail.test diff --git a/sql/handler.cc b/sql/handler.cc index 926ef2f4f54..eaaf4664c07 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -2057,8 +2057,11 @@ int ha_rollback_trans(THD *thd, bool all) rollback without signalling following transactions. And in release builds, we explicitly do the signalling before rolling back. */ - DBUG_ASSERT(!(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit) || - thd->transaction->xid_state.is_explicit_XA()); + DBUG_ASSERT( + !(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit) || + (thd->transaction->xid_state.is_explicit_XA() || + (thd->rgi_slave->gtid_ev_flags2 & Gtid_log_event::FL_PREPARED_XA))); + if (thd->rgi_slave && thd->rgi_slave->did_mark_start_commit) thd->rgi_slave->unmark_start_commit(); } diff --git a/sql/log_event.h b/sql/log_event.h index 5dcf0315f68..2fcc4549bc9 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -3042,7 +3042,7 @@ private: virtual int do_commit()= 0; virtual int do_apply_event(rpl_group_info *rgi); int do_record_gtid(THD *thd, rpl_group_info *rgi, bool in_trans, - void **out_hton); + void **out_hton, bool force_err= false); enum_skip_reason do_shall_skip(rpl_group_info *rgi); virtual const char* get_query()= 0; #endif diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index c5fb637a000..a2b78bc241d 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -152,6 +152,30 @@ is_parallel_retry_error(rpl_group_info *rgi, int err) return has_temporary_error(rgi->thd); } +/** + Accumulate a Diagnostics_area's errors and warnings into an output buffer + + @param errbuf The output buffer to write error messages + @param errbuf_size The size of the output buffer + @param da The Diagnostics_area to check for errors +*/ +static void inline aggregate_da_errors(char *errbuf, size_t errbuf_size, + Diagnostics_area *da) +{ + const char *errbuf_end= errbuf + errbuf_size; + char *slider; + Diagnostics_area::Sql_condition_iterator it= da->sql_conditions(); + const Sql_condition *err; + size_t len; + for (err= it++, slider= errbuf; err && slider < errbuf_end - 1; + slider += len, err= it++) + { + len= my_snprintf(slider, errbuf_end - slider, + " %s, Error_code: %d;", err->get_message_text(), + err->get_sql_errno()); + } +} + /** Error reporting facility for Rows_log_event::do_apply_event @@ -172,13 +196,8 @@ static void inline slave_rows_error_report(enum loglevel level, int ha_error, const char *log_name, my_off_t pos) { const char *handler_error= (ha_error ? HA_ERR(ha_error) : NULL); - char buff[MAX_SLAVE_ERRMSG], *slider; - const char *buff_end= buff + sizeof(buff); - size_t len; - Diagnostics_area::Sql_condition_iterator it= - thd->get_stmt_da()->sql_conditions(); + char buff[MAX_SLAVE_ERRMSG]; Relay_log_info const *rli= rgi->rli; - const Sql_condition *err; buff[0]= 0; int errcode= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0; @@ -191,13 +210,7 @@ static void inline slave_rows_error_report(enum loglevel level, int ha_error, if (is_parallel_retry_error(rgi, errcode)) return; - for (err= it++, slider= buff; err && slider < buff_end - 1; - slider += len, err= it++) - { - len= my_snprintf(slider, buff_end - slider, - " %s, Error_code: %d;", err->get_message_text(), - err->get_sql_errno()); - } + aggregate_da_errors(buff, sizeof(buff), thd->get_stmt_da()); if (ha_error != 0) rli->report(level, errcode, rgi->gtid_info(), @@ -3893,7 +3906,8 @@ bool slave_execute_deferred_events(THD *thd) #if defined(HAVE_REPLICATION) int Xid_apply_log_event::do_record_gtid(THD *thd, rpl_group_info *rgi, - bool in_trans, void **out_hton) + bool in_trans, void **out_hton, + bool force_err) { int err= 0; Relay_log_info const *rli= rgi->rli; @@ -3908,14 +3922,26 @@ int Xid_apply_log_event::do_record_gtid(THD *thd, rpl_group_info *rgi, int ec= thd->get_stmt_da()->sql_errno(); /* Do not report an error if this is really a kill due to a deadlock. - In this case, the transaction will be re-tried instead. + In this case, the transaction will be re-tried instead. Unless force_err + is set, as in the case of XA PREPARE, as the GTID state is updated as a + separate transaction, and if that fails, we should not retry but exit in + error immediately. */ - if (!is_parallel_retry_error(rgi, ec)) + if (!is_parallel_retry_error(rgi, ec) || force_err) + { + char buff[MAX_SLAVE_ERRMSG]; + buff[0]= 0; + aggregate_da_errors(buff, sizeof(buff), thd->get_stmt_da()); + + if (force_err) + thd->clear_error(); + rli->report(ERROR_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, rgi->gtid_info(), "Error during XID COMMIT: failed to update GTID state in " - "%s.%s: %d: %s", + "%s.%s: %d: %s the event's master log %s, end_log_pos %llu", "mysql", rpl_gtid_slave_state_table_name.str, ec, - thd->get_stmt_da()->message()); + buff, RPL_LOG_NAME, log_pos); + } thd->is_slave_error= 1; } @@ -3989,7 +4015,7 @@ int Xid_apply_log_event::do_apply_event(rpl_group_info *rgi) { DBUG_ASSERT(!thd->transaction->xid_state.is_explicit_XA()); - if ((err= do_record_gtid(thd, rgi, false, &hton))) + if ((err= do_record_gtid(thd, rgi, false, &hton, true))) return err; }