From e821c9fa7cd822730882b28a2648e24539971e6c Mon Sep 17 00:00:00 2001 From: Daniele Sciascia Date: Tue, 29 Oct 2024 10:47:20 +0100 Subject: [PATCH] MDEV-35281 SR transaction crashes with innodb_snapshot_isolation Ignore snapshot isolation conflict during fragment removal, before streaming transaction commits. This happens when a streaming transaction creates a read view that precedes the INSERTion of fragments into the streaming_log table. Fragments are INSERTed using a different transaction. These fragment are then removed as part of COMMIT of the streaming transaction. This fragment removal operation could fail when the fragments were not part the transaction's read view, thus violating snapshot isolation. --- .../suite/galera_sr/r/MDEV-35281.result | 9 +++++ mysql-test/suite/galera_sr/t/MDEV-35281.test | 37 +++++++++++++++++++ sql/sql_class.h | 8 ++-- sql/wsrep_schema.cc | 20 ++++++++++ storage/innobase/lock/lock0lock.cc | 4 +- 5 files changed, 74 insertions(+), 4 deletions(-) create mode 100644 mysql-test/suite/galera_sr/r/MDEV-35281.result create mode 100644 mysql-test/suite/galera_sr/t/MDEV-35281.test diff --git a/mysql-test/suite/galera_sr/r/MDEV-35281.result b/mysql-test/suite/galera_sr/r/MDEV-35281.result new file mode 100644 index 00000000000..eeac965db19 --- /dev/null +++ b/mysql-test/suite/galera_sr/r/MDEV-35281.result @@ -0,0 +1,9 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY); +SET SESSION wsrep_trx_fragment_size=1; +SET SESSION innodb_snapshot_isolation=ON; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +INSERT INTO t1 VALUES (1); +COMMIT; +DROP TABLE t1; diff --git a/mysql-test/suite/galera_sr/t/MDEV-35281.test b/mysql-test/suite/galera_sr/t/MDEV-35281.test new file mode 100644 index 00000000000..1ed2feab7f8 --- /dev/null +++ b/mysql-test/suite/galera_sr/t/MDEV-35281.test @@ -0,0 +1,37 @@ +# +# MDEV-35281 - SR transaction crashes with innodb_snapshot_isolation +# +# Test outline: a simple SR transaction fails to remove +# its fragments from streaming_log table, with error +# HA_ERR_RECORD_CHANGED. +# This happens with the following sequence of events: +# 1. Start a streaming replication transaction +# 2. The transaction creates a read view in InnoDB +# (this must happen before a fragment is replicated) +# 3. The transaction replicates a fragment. +# Internally, a new transaction is created to INSERT +# a row representing the fragment into the streaming_log +# table and is committed immediately. +# 4. The streaming replication transaction COMMITs. +# Before committing, the transaction replicates +# a commit fragment and DELETEs its fragments that +# were created in the streaming_log table. +# If bug is present, fragment removal from the +# streaming_log table violates snapshot isolation, +# thus the operation fails with HA_ERR_RECORD_CHANGED. +# (One or more records from the streaming_log table +# are removed, while these were not visible to +# the transaction). + +--source include/galera_cluster.inc + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY); + +SET SESSION wsrep_trx_fragment_size=1; +SET SESSION innodb_snapshot_isolation=ON; + +START TRANSACTION WITH CONSISTENT SNAPSHOT; +INSERT INTO t1 VALUES (1); +COMMIT; + +DROP TABLE t1; diff --git a/sql/sql_class.h b/sql/sql_class.h index da04c80e9e9..319556bdd02 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -5561,9 +5561,11 @@ public: query_id_t wsrep_last_query_id; XID wsrep_xid; - /** This flag denotes that record locking should be skipped during INSERT - and gap locking during SELECT. Only used by the streaming replication thread - that only modifies the wsrep_schema.SR table. */ + /** This flag denotes that record locking should be skipped during INSERT, + gap locking during SELECT, and write-write conflicts due to innodb + snapshot isolation during DELETE. + Only used by the streaming replication thread that only modifies the + mysql.wsrep_streaming_log table. */ my_bool wsrep_skip_locking; mysql_cond_t COND_wsrep_thd; diff --git a/sql/wsrep_schema.cc b/sql/wsrep_schema.cc index 6afe1771667..aa71a8eb5b7 100644 --- a/sql/wsrep_schema.cc +++ b/sql/wsrep_schema.cc @@ -183,6 +183,25 @@ private: my_bool m_wsrep_ignore_table; }; +class wsrep_skip_locking +{ +public: + wsrep_skip_locking(THD *thd) + : m_thd(thd) + , m_wsrep_skip_locking(thd->wsrep_skip_locking) + { + thd->wsrep_skip_locking= true; + } + ~wsrep_skip_locking() + { + m_thd->wsrep_skip_locking= m_wsrep_skip_locking; + } + +private: + THD *m_thd; + my_bool m_wsrep_skip_locking; +}; + class thd_server_status { public: @@ -1235,6 +1254,7 @@ int Wsrep_schema::remove_fragments(THD* thd, Wsrep_schema_impl::wsrep_ignore_table wsrep_ignore_table(thd); Wsrep_schema_impl::binlog_off binlog_off(thd); Wsrep_schema_impl::sql_safe_updates sql_safe_updates(thd); + Wsrep_schema_impl::wsrep_skip_locking skip_locking(thd); Query_tables_list query_tables_list_backup; Open_tables_backup open_tables_backup; diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index a140ff0db47..2392d67bb04 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -6375,7 +6375,9 @@ lock_clust_rec_read_check_and_lock( && trx->snapshot_isolation && trx->read_view.is_open() && !trx->read_view.changes_visible( - trx_read_trx_id(rec + row_trx_id_offset(rec, index)))) { + trx_read_trx_id(rec + row_trx_id_offset(rec, index))) + && IF_WSREP(!(trx->is_wsrep() + && wsrep_thd_skip_locking(trx->mysql_thd)), true)) { return DB_RECORD_CHANGED; }