From 451bd32600ea5e1fd4b9320ad20104372c050ccb Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Wed, 17 Jun 2020 14:18:02 +0200 Subject: [PATCH 01/13] MDEV-22922: galera_ftwrl_drain test failed A new state has been added to the wait condition, which eliminates the error during the test. --- mysql-test/suite/galera/t/galera_ftwrl_drain.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql-test/suite/galera/t/galera_ftwrl_drain.test b/mysql-test/suite/galera/t/galera_ftwrl_drain.test index ee64e147f65..c8cdda5d624 100644 --- a/mysql-test/suite/galera/t/galera_ftwrl_drain.test +++ b/mysql-test/suite/galera/t/galera_ftwrl_drain.test @@ -39,7 +39,7 @@ SELECT COUNT(*) = 0 FROM t1; --connection node_2 --sleep 1 ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE = 'Init' AND INFO = 'FLUSH TABLES WITH READ LOCK' +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE (STATE = 'Init' OR STATE = 'starting') AND INFO = 'FLUSH TABLES WITH READ LOCK' --source include/wait_condition.inc --source include/galera_clear_sync_point.inc From 572e53d8ccf68ec04722d9868ed50db0276d6d54 Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Fri, 19 Jun 2020 15:24:16 +0530 Subject: [PATCH 02/13] MDEV-22931 mtr_t::mtr_t() allocates some memory mtr_t::m_freed_pages: Renamed from m_freed_ranges and made it as pointer indirection. mtr_t::add_freed_offset(): Allocates m_freed_pages. mtr_t:clear_freed_ranges(): Removed. mtr_t::init(): Added debug assertion to check whether m_freed_pages is not yet initialized. btr_page_alloc_low(): Remove #ifdef UNIV_DEBUG_SCRUBBING. mtr_t::commit(): Delete m_freed_pages, reset m_trim_pages and m_freed_in_system_tablespace. fil_space_t::clear_freed_ranges(): Added a comment to explain how undo log tablespaces uses it. --- storage/innobase/btr/btr0btr.cc | 16 ---------------- storage/innobase/include/fil0fil.h | 3 ++- storage/innobase/include/mtr0log.h | 1 + storage/innobase/include/mtr0mtr.h | 13 ++++--------- storage/innobase/mtr/mtr0mtr.cc | 30 ++++++++++++++---------------- 5 files changed, 21 insertions(+), 42 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 11104f078ff..43a3b44903f 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -537,22 +537,6 @@ btr_page_alloc_low( seg_header, hint_page_no, file_direction, true, mtr, init_mtr); -#ifdef UNIV_DEBUG_SCRUBBING - if (block != NULL) { - fprintf(stderr, - "alloc %lu:%lu to index: %lu root: %lu\n", - buf_block_get_page_no(block), - buf_block_get_space(block), - index->id, - dict_index_get_page(index)); - } else { - fprintf(stderr, - "failed alloc index: %lu root: %lu\n", - index->id, - dict_index_get_page(index)); - } -#endif /* UNIV_DEBUG_SCRUBBING */ - return block; } diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 7d0402574d6..6bc150184ef 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -502,7 +502,8 @@ struct fil_space_t last_freed_lsn= lsn; } - /** Clear all freed ranges */ + /** Clear all freed ranges for undo tablespace when InnoDB + encounters TRIM redo log record */ void clear_freed_ranges() { std::lock_guard freed_lock(freed_range_mutex); diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index cf5f7c751ee..926411a6f97 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -511,6 +511,7 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, @param[in,out] b buffer page */ inline void mtr_t::init(buf_block_t *b) { + ut_ad(!m_freed_pages); b->page.status= buf_page_t::INIT_ON_FLUSH; if (m_log_mode != MTR_LOG_ALL) diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index cd1b9bef4aa..6058d0daaf6 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -580,16 +580,11 @@ public: void add_freed_offset(page_id_t id) { ut_ad(m_user_space == NULL || id.space() == m_user_space->id); - m_freed_ranges.add_value(id.page_no()); + if (!m_freed_pages) + m_freed_pages= new range_set(); + m_freed_pages->add_value(id.page_no()); } - /** Clear the freed pages */ - void clear_freed_ranges() - { - m_freed_ranges.clear(); - m_freed_in_system_tablespace= 0; - m_trim_pages= false; - } private: /** Log a write of a byte string to a page. @param block buffer page @@ -685,7 +680,7 @@ private: lsn_t m_commit_lsn; /** set of freed page ids */ - range_set m_freed_ranges; + range_set *m_freed_pages= nullptr; }; #include "mtr0mtr.ic" diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 0997a735699..2a2f61641fe 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -354,12 +354,8 @@ struct mtr_write_log_t { /** Start a mini-transaction. */ void mtr_t::start() { -#ifdef HAVE_valgrind_or_MSAN - char m_freed_ranges_vbits[sizeof m_freed_ranges]; -#endif - MEM_GET_VBITS(&m_freed_ranges, m_freed_ranges_vbits, sizeof m_freed_ranges); UNIV_MEM_INVALID(this, sizeof *this); - MEM_SET_VBITS(&m_freed_ranges, m_freed_ranges_vbits, sizeof m_freed_ranges); + UNIV_MEM_VALID(&m_freed_pages, sizeof(m_freed_pages)); ut_d(m_start= true); ut_d(m_commit= false); @@ -378,6 +374,7 @@ void mtr_t::start() m_user_space= nullptr; m_commit_lsn= 0; m_freed_in_system_tablespace= m_trim_pages= false; + ut_ad(!m_freed_pages); } /** Release the resources */ @@ -387,7 +384,6 @@ inline void mtr_t::release_resources() ut_d(m_memo.for_each_block_in_reverse(CIterate())); m_log.erase(); m_memo.erase(); - clear_freed_ranges(); ut_d(m_commit= true); } @@ -420,8 +416,9 @@ void mtr_t::commit() to insert into the flush list. */ log_mutex_exit(); - if (!m_freed_ranges.empty()) + if (m_freed_pages) { + ut_ad(!m_freed_pages->empty()); fil_space_t *freed_space= m_user_space; /* Get the freed tablespace in case of predefined tablespace */ if (!freed_space) @@ -434,14 +431,15 @@ void mtr_t::commit() /* Update the last freed lsn */ freed_space->update_last_freed_lsn(m_commit_lsn); - for (const auto &range : m_freed_ranges) - freed_space->add_free_range(range); - } - - if (is_trim_pages()) - { - ut_ad(m_user_space != nullptr); - m_user_space->clear_freed_ranges(); + if (!is_trim_pages()) + for (const auto &range : *m_freed_pages) + freed_space->add_free_range(range); + else + freed_space->clear_freed_ranges(); + delete m_freed_pages; + m_freed_pages= nullptr; + /* Reset of m_trim_pages and m_freed_in_system_tablespace + happens in mtr_t::start() */ } m_memo.for_each_block_in_reverse(CIterate @@ -472,7 +470,7 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn) ut_ad(!m_made_dirty); ut_ad(m_memo.size() == 0); ut_ad(!srv_read_only_mode); - ut_ad(m_freed_ranges.empty()); + ut_ad(!m_freed_pages); ut_ad(!m_freed_in_system_tablespace); if (checkpoint_lsn) { From 9160e4aa95cd3c40ea0733d632692526049ed12b Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Fri, 19 Jun 2020 22:04:02 +0300 Subject: [PATCH 03/13] Post-fix for 0a9633ee: Basic LEX::print function that supports UPDATEs Backport this fix: CLX-105: UPDATE query causes crash LEX::print() should not crash when the UPDATE has no WHERE clause Fixes CLX-373. --- sql/sql_lex.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 671948f8e8d..f39f88fe843 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -3565,8 +3565,11 @@ void LEX::print(String *str, enum_query_type query_type) value->print(str, query_type); } - str->append(STRING_WITH_LEN(" WHERE ")); - sel->where->print(str, query_type); + if (sel->where) + { + str->append(STRING_WITH_LEN(" WHERE ")); + sel->where->print(str, query_type); + } if (sel->order_list.elements) { From 545a6194e827ace9d2d560d89dcde735676abbdb Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 22 Jun 2020 19:48:46 +0530 Subject: [PATCH 04/13] MDEV-22187: SIGSEGV in ha_innobase::cmp_ref on DELETE Added a new test file for tests with delete using INNODB. --- mysql-test/main/delete_innodb.result | 26 ++++++++++++++++++++++++++ mysql-test/main/delete_innodb.test | 22 ++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 mysql-test/main/delete_innodb.result create mode 100644 mysql-test/main/delete_innodb.test diff --git a/mysql-test/main/delete_innodb.result b/mysql-test/main/delete_innodb.result new file mode 100644 index 00000000000..e82d162cce8 --- /dev/null +++ b/mysql-test/main/delete_innodb.result @@ -0,0 +1,26 @@ +# Tests for delete with INNODB +# +# MDEV-22187: SIGSEGV in ha_innobase::cmp_ref on DELETE +# +SET @save_sort_buffer_size= @@sort_buffer_size; +SET sort_buffer_size=1024; +CREATE TABLE t1(c1 CHAR(255) PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES (0), ('a'), ('b'); +ANALYZE TABLE T1 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT * FROM t1; +c1 +0 +a +b +EXPLAIN DELETE b FROM t1 AS a JOIN t1 AS b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE a index NULL PRIMARY 255 NULL 3 Using index +1 SIMPLE b ALL NULL NULL NULL NULL 3 +DELETE b FROM t1 AS a JOIN t1 AS b; +SELECT * FROM t1; +c1 +SET sort_buffer_size=@save_sort_buffer_size; +DROP TABLE t1; diff --git a/mysql-test/main/delete_innodb.test b/mysql-test/main/delete_innodb.test new file mode 100644 index 00000000000..17a4e4d98f3 --- /dev/null +++ b/mysql-test/main/delete_innodb.test @@ -0,0 +1,22 @@ +--source include/have_innodb.inc +--source include/have_sequence.inc + +--echo # Tests for delete with INNODB + +--echo # +--echo # MDEV-22187: SIGSEGV in ha_innobase::cmp_ref on DELETE +--echo # + +SET @save_sort_buffer_size= @@sort_buffer_size; +SET sort_buffer_size=1024; +CREATE TABLE t1(c1 CHAR(255) PRIMARY KEY) ENGINE=InnoDB; + +INSERT INTO t1 VALUES (0), ('a'), ('b'); +ANALYZE TABLE T1 PERSISTENT FOR ALL; +SELECT * FROM t1; +EXPLAIN DELETE b FROM t1 AS a JOIN t1 AS b; +DELETE b FROM t1 AS a JOIN t1 AS b; +SELECT * FROM t1; + +SET sort_buffer_size=@save_sort_buffer_size; +DROP TABLE t1; From 127ed74fd2c3398e2ebc6602291250a5db4b2ec4 Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Fri, 5 Jun 2020 16:41:25 +0300 Subject: [PATCH 05/13] MDEV-22420 DDL on temporary object is prohibited when XA is in prepare state The parser must reject DDL operations on temporary objects when they may modify or alter such object, including temporary tables and sequences. The rejection is regardless (has been already in place for bin-loggable DML:s) of the binlogging capability of the server or connection. The patch implements the requirement. A binlog test is added. --- .../binlog/r/binlog_empty_xa_prepared.result | 31 ++++++++++++++++ .../binlog/t/binlog_empty_xa_prepared.test | 35 +++++++++++++++++++ sql/sql_parse.cc | 2 ++ 3 files changed, 68 insertions(+) create mode 100644 mysql-test/suite/binlog/r/binlog_empty_xa_prepared.result create mode 100644 mysql-test/suite/binlog/t/binlog_empty_xa_prepared.test diff --git a/mysql-test/suite/binlog/r/binlog_empty_xa_prepared.result b/mysql-test/suite/binlog/r/binlog_empty_xa_prepared.result new file mode 100644 index 00000000000..eb96f7bb984 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_empty_xa_prepared.result @@ -0,0 +1,31 @@ +CREATE TEMPORARY SEQUENCE seq_1; +XA START '3'; +CREATE TEMPORARY TABLE tmp_1(c INT); +XA END '3'; +XA PREPARE '3'; +DROP TEMPORARY TABLE tmp_1; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the PREPARED state +ALTER TABLE tmp_1 DROP COLUMN c; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the PREPARED state +DROP TEMPORARY SEQUENCE seq_1; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the PREPARED state +ALTER SEQUENCE seq_1 INCREMENT BY 1; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the PREPARED state +CREATE TEMPORARY TABLE tmp_2(c INT); +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the PREPARED state +CREATE TEMPORARY SEQUENCE seq_2; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the PREPARED state +XA ROLLBACK '3'; +# Proof of correct logging incl empty XA-PREPARE +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # GTID #-#-# +master-bin.000001 # Query # # use `test`; CREATE TEMPORARY SEQUENCE seq_1 +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; CREATE TEMPORARY TABLE tmp_1(c INT) +master-bin.000001 # Query # # COMMIT +master-bin.000001 # Gtid # # XA START X'33',X'',1 GTID #-#-# +master-bin.000001 # Query # # XA END X'33',X'',1 +master-bin.000001 # XA_prepare # # XA PREPARE X'33',X'',1 +master-bin.000001 # Gtid # # GTID #-#-# +master-bin.000001 # Query # # XA ROLLBACK X'33',X'',1 diff --git a/mysql-test/suite/binlog/t/binlog_empty_xa_prepared.test b/mysql-test/suite/binlog/t/binlog_empty_xa_prepared.test new file mode 100644 index 00000000000..7f923f71f81 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_empty_xa_prepared.test @@ -0,0 +1,35 @@ +# The test verifies execution and binary logging of user XA that produce empty +# XA-PREPARE group of events. + +--source include/have_binlog_format_mixed.inc +--source include/have_innodb.inc + + +# MDEV-22420 DDL on temporary object is prohibited when XA is in prepare state + +# Temporary sequnce may not be created within a transaction +CREATE TEMPORARY SEQUENCE seq_1; + +XA START '3'; +CREATE TEMPORARY TABLE tmp_1(c INT); +XA END '3'; +XA PREPARE '3'; +--error ER_XAER_RMFAIL +DROP TEMPORARY TABLE tmp_1; +--error ER_XAER_RMFAIL +ALTER TABLE tmp_1 DROP COLUMN c; +--error ER_XAER_RMFAIL +DROP TEMPORARY SEQUENCE seq_1; +--error ER_XAER_RMFAIL +ALTER SEQUENCE seq_1 INCREMENT BY 1; + +--error ER_XAER_RMFAIL +CREATE TEMPORARY TABLE tmp_2(c INT); +--error ER_XAER_RMFAIL +CREATE TEMPORARY SEQUENCE seq_2; + +# Cleanup +XA ROLLBACK '3'; + +--echo # Proof of correct logging incl empty XA-PREPARE +--source include/show_binlog_events.inc diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index e35ee16cf4b..cd60472e81c 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -4890,6 +4890,8 @@ mysql_execute_command(THD *thd) } else { + if (thd->transaction->xid_state.check_has_uncommitted_xa()) + goto error; status_var_decrement(thd->status_var.com_stat[lex->sql_command]); status_var_increment(thd->status_var.com_drop_tmp_table); From ead98fe5d34912578445d42e620c8ed95df4c433 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 22 Jun 2020 23:40:56 +0530 Subject: [PATCH 06/13] Fixing a test --- mysql-test/main/delete_innodb.result | 2 +- mysql-test/main/delete_innodb.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mysql-test/main/delete_innodb.result b/mysql-test/main/delete_innodb.result index e82d162cce8..b9f4c8bdaf5 100644 --- a/mysql-test/main/delete_innodb.result +++ b/mysql-test/main/delete_innodb.result @@ -6,7 +6,7 @@ SET @save_sort_buffer_size= @@sort_buffer_size; SET sort_buffer_size=1024; CREATE TABLE t1(c1 CHAR(255) PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 VALUES (0), ('a'), ('b'); -ANALYZE TABLE T1 PERSISTENT FOR ALL; +ANALYZE TABLE t1 PERSISTENT FOR ALL; Table Op Msg_type Msg_text test.t1 analyze status Engine-independent statistics collected test.t1 analyze status OK diff --git a/mysql-test/main/delete_innodb.test b/mysql-test/main/delete_innodb.test index 17a4e4d98f3..c5c5c5d0172 100644 --- a/mysql-test/main/delete_innodb.test +++ b/mysql-test/main/delete_innodb.test @@ -12,7 +12,7 @@ SET sort_buffer_size=1024; CREATE TABLE t1(c1 CHAR(255) PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 VALUES (0), ('a'), ('b'); -ANALYZE TABLE T1 PERSISTENT FOR ALL; +ANALYZE TABLE t1 PERSISTENT FOR ALL; SELECT * FROM t1; EXPLAIN DELETE b FROM t1 AS a JOIN t1 AS b; DELETE b FROM t1 AS a JOIN t1 AS b; From e9c389c3346752df4bc3b000a16c799f0f8eca13 Mon Sep 17 00:00:00 2001 From: Eugene Kosov Date: Tue, 16 Jun 2020 23:57:51 +0300 Subject: [PATCH 07/13] MDEV-22701 InnoDB: encapsulate trx_sys.mutex and trx_sys.trx_list into a separate class thread_safe_trx_ilist_t: almost generic one UT_LIST was replaced with ilist innobase_kill_query: wrong comment removed. --- storage/innobase/buf/buf0buf.cc | 59 +++++++++++++--------- storage/innobase/handler/ha_innodb.cc | 10 ++-- storage/innobase/include/trx0sys.h | 71 +++++++++++++++++++-------- storage/innobase/include/trx0trx.h | 7 +-- storage/innobase/lock/lock0lock.cc | 16 +++--- storage/innobase/read/read0read.cc | 8 ++- storage/innobase/trx/trx0i_s.cc | 18 +++---- storage/innobase/trx/trx0sys.cc | 21 +++----- storage/innobase/trx/trx0trx.cc | 11 +++-- 9 files changed, 123 insertions(+), 98 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index c4453efc74f..f8fdfb14060 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2063,6 +2063,40 @@ inline void buf_pool_t::write_unlock_all_page_hash() old_page_hash->write_unlock_all(); } +namespace +{ + +struct find_interesting_trx +{ + void operator()(const trx_t &trx) + { + if (trx.state == TRX_STATE_NOT_STARTED) + return; + if (trx.mysql_thd == nullptr) + return; + if (withdraw_started <= trx.start_time) + return; + + if (!found) + { + ib::warn() << "The following trx might hold " + "the blocks in buffer pool to " + "be withdrawn. Buffer pool " + "resizing can complete only " + "after all the transactions " + "below release the blocks."; + found= true; + } + + lock_trx_print_wait_and_mvcc_state(stderr, &trx, current_time); + } + + bool &found; + time_t withdraw_started; + time_t current_time; +}; + +} // namespace /** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */ inline void buf_pool_t::resize() @@ -2160,30 +2194,9 @@ withdraw_retry: } lock_mutex_enter(); - mutex_enter(&trx_sys.mutex); bool found = false; - for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys.trx_list); - trx != NULL; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - if (trx->state != TRX_STATE_NOT_STARTED - && trx->mysql_thd != NULL - && withdraw_started > trx->start_time) { - if (!found) { - ib::warn() << - "The following trx might hold" - " the blocks in buffer pool to" - " be withdrawn. Buffer pool" - " resizing can complete only" - " after all the transactions" - " below release the blocks."; - found = true; - } - - lock_trx_print_wait_and_mvcc_state( - stderr, trx, current_time); - } - } - mutex_exit(&trx_sys.mutex); + trx_sys.trx_list.for_each(find_interesting_trx{ + found, withdraw_started, current_time}); lock_mutex_exit(); withdraw_started = current_time; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4e1155836a4..df9f52adb51 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4824,23 +4824,23 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels) DBUG_VOID_RETURN; #endif /* WITH_WSREP */ lock_mutex_enter(); - mutex_enter(&trx_sys.mutex); + trx_sys.trx_list.freeze(); trx_mutex_enter(trx); /* It is possible that innobase_close_connection() is concurrently being executed on our victim. Even if the trx object is later reused for another client connection or a background transaction, its trx->mysql_thd will differ from our thd. - trx_t::state changes are protected by trx_t::mutex, and - trx_sys.trx_list is protected by trx_sys.mutex, in - both trx_create() and trx_free(). + trx_sys.trx_list is thread-safe. It's freezed to 'protect' + trx_t. However, trx_t::commit_in_memory() changes a trx_t::state + of autocommit non-locking transactions without any protection. At this point, trx may have been reallocated for another client connection, or for a background operation. In that case, either trx_t::state or trx_t::mysql_thd should not match our expectations. */ bool cancel= trx->mysql_thd == thd && trx->state == TRX_STATE_ACTIVE && !trx->lock.was_chosen_as_deadlock_victim; - mutex_exit(&trx_sys.mutex); + trx_sys.trx_list.unfreeze(); if (!cancel); else if (lock_t *lock= trx->lock.wait_lock) lock_cancel_waiting_and_release(lock); diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index 695a077e483..d91e77f9a52 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -41,8 +41,7 @@ Created 3/26/1996 Heikki Tuuri #ifdef WITH_WSREP #include "trx0xa.h" #endif /* WITH_WSREP */ - -typedef UT_LIST_BASE_NODE_T(trx_t) trx_ut_list_t; +#include "ilist.h" /** Checks if a page address is the trx sys header page. @param[in] page_id page id @@ -803,6 +802,49 @@ public: } }; +class thread_safe_trx_ilist_t +{ +public: + void create() { mutex_create(LATCH_ID_TRX_SYS, &mutex); } + void close() { mutex_free(&mutex); } + + bool empty() const + { + mutex_enter(&mutex); + auto result= trx_list.empty(); + mutex_exit(&mutex); + return result; + } + + void push_front(trx_t &trx) + { + mutex_enter(&mutex); + trx_list.push_front(trx); + mutex_exit(&mutex); + } + + void remove(trx_t &trx) + { + mutex_enter(&mutex); + trx_list.remove(trx); + mutex_exit(&mutex); + } + + template void for_each(Callable &&callback) const + { + mutex_enter(&mutex); + for (const auto &trx : trx_list) + callback(trx); + mutex_exit(&mutex); + } + + void freeze() const { mutex_enter(&mutex); } + void unfreeze() const { mutex_exit(&mutex); } + +private: + alignas(CACHE_LINE_SIZE) mutable TrxSysMutex mutex; + alignas(CACHE_LINE_SIZE) ilist trx_list; +}; /** The transaction system central memory data structure. */ class trx_sys_t @@ -833,11 +875,8 @@ public: */ MY_ALIGNED(CACHE_LINE_SIZE) Atomic_counter rseg_history_len; - /** Mutex protecting trx_list AND NOTHING ELSE. */ - MY_ALIGNED(CACHE_LINE_SIZE) mutable TrxSysMutex mutex; - /** List of all transactions. */ - MY_ALIGNED(CACHE_LINE_SIZE) trx_ut_list_t trx_list; + thread_safe_trx_ilist_t trx_list; MY_ALIGNED(CACHE_LINE_SIZE) /** Temporary rollback segments */ @@ -978,7 +1017,6 @@ public: void snapshot_ids(trx_t *caller_trx, trx_ids_t *ids, trx_id_t *max_trx_id, trx_id_t *min_trx_no) { - ut_ad(!mutex_own(&mutex)); snapshot_ids_arg arg(ids); while ((arg.m_id= get_rw_trx_hash_version()) != get_max_trx_id()) @@ -1075,9 +1113,7 @@ public: */ void register_trx(trx_t *trx) { - mutex_enter(&mutex); - UT_LIST_ADD_FIRST(trx_list, trx); - mutex_exit(&mutex); + trx_list.push_front(*trx); } @@ -1088,9 +1124,7 @@ public: */ void deregister_trx(trx_t *trx) { - mutex_enter(&mutex); - UT_LIST_REMOVE(trx_list, trx); - mutex_exit(&mutex); + trx_list.remove(*trx); } @@ -1109,14 +1143,11 @@ public: { size_t count= 0; - mutex_enter(&mutex); - for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx; - trx= UT_LIST_GET_NEXT(trx_list, trx)) - { - if (trx->read_view.is_open()) + trx_list.for_each([&count](const trx_t &trx) { + if (trx.read_view.is_open()) ++count; - } - mutex_exit(&mutex); + }); + return count; } diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 2e6883e44aa..034552fc62e 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -35,6 +35,7 @@ Created 3/26/1996 Heikki Tuuri #include "ut0vec.h" #include "fts0fts.h" #include "read0types.h" +#include "ilist.h" #include #include @@ -713,7 +714,7 @@ struct trx_rsegs_t { trx_temp_undo_t m_noredo; }; -struct trx_t { +struct trx_t : ilist_node<> { private: /** Count of references. @@ -909,10 +910,6 @@ public: /*!< how many tables the current SQL statement uses, except those in consistent read */ - /*------------------------------*/ - UT_LIST_NODE_T(trx_t) trx_list; /*!< list of all transactions; - protected by trx_sys.mutex */ - /*------------------------------*/ dberr_t error_state; /*!< 0 if no error, otherwise error number; NOTE That ONLY the thread doing the transaction is allowed to diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 1ea39da6c4d..53d79b75a35 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -4654,15 +4654,14 @@ struct lock_print_info purge_trx(purge_sys.query ? purge_sys.query->trx : NULL) {} - void operator()(const trx_t* trx) const + void operator()(const trx_t &trx) const { - ut_ad(mutex_own(&trx_sys.mutex)); - if (UNIV_UNLIKELY(trx == purge_trx)) + if (UNIV_UNLIKELY(&trx == purge_trx)) return; - lock_trx_print_wait_and_mvcc_state(file, trx, now); + lock_trx_print_wait_and_mvcc_state(file, &trx, now); - if (trx->will_lock && srv_print_innodb_lock_monitor) - lock_trx_print_locks(file, trx); + if (trx.will_lock && srv_print_innodb_lock_monitor) + lock_trx_print_locks(file, &trx); } FILE* const file; @@ -4682,11 +4681,8 @@ lock_print_info_all_transactions( ut_ad(lock_mutex_own()); fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n"); - const time_t now = time(NULL); - mutex_enter(&trx_sys.mutex); - ut_list_map(trx_sys.trx_list, lock_print_info(file, now)); - mutex_exit(&trx_sys.mutex); + trx_sys.trx_list.for_each(lock_print_info(file, time(nullptr))); lock_mutex_exit(); ut_ad(lock_validate()); diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc index e1d93d74a09..92504546130 100644 --- a/storage/innobase/read/read0read.cc +++ b/storage/innobase/read/read0read.cc @@ -245,10 +245,8 @@ void ReadView::open(trx_t *trx) void trx_sys_t::clone_oldest_view(ReadViewBase *view) const { view->snapshot(nullptr); - mutex_enter(&mutex); /* Find oldest view. */ - for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx; - trx= UT_LIST_GET_NEXT(trx_list, trx)) - trx->read_view.append_to(view); - mutex_exit(&mutex); + trx_list.for_each([view](const trx_t &trx) { + trx.read_view.append_to(view); + }); } diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc index 66aa6a679cb..8e46057f654 100644 --- a/storage/innobase/trx/trx0i_s.cc +++ b/storage/innobase/trx/trx0i_s.cc @@ -1220,19 +1220,13 @@ static void fetch_data_into_cache(trx_i_s_cache_t *cache) trx_i_s_cache_clear(cache); /* Capture the state of transactions */ - mutex_enter(&trx_sys.mutex); - for (const trx_t *trx= UT_LIST_GET_FIRST(trx_sys.trx_list); - trx != NULL; - trx= UT_LIST_GET_NEXT(trx_list, trx)) - { - if (trx_is_started(trx) && trx != purge_sys.query->trx) + trx_sys.trx_list.for_each([cache](const trx_t &trx) { + if (!cache->is_truncated && trx_is_started(&trx) && + &trx != purge_sys.query->trx) { - fetch_data_into_cache_low(cache, trx); - if (cache->is_truncated) - break; - } - } - mutex_exit(&trx_sys.mutex); + fetch_data_into_cache_low(cache, &trx); + } + }); cache->is_truncated= false; } diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index 45d1c81621b..52b344498b1 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -205,8 +205,7 @@ trx_sys_t::create() ut_ad(this == &trx_sys); ut_ad(!is_initialised()); m_initialised = true; - mutex_create(LATCH_ID_TRX_SYS, &mutex); - UT_LIST_INIT(trx_list, &trx_t::trx_list); + trx_list.create(); rseg_history_len= 0; rw_trx_hash.init(); @@ -320,8 +319,8 @@ trx_sys_t::close() } } - ut_a(UT_LIST_GET_LEN(trx_list) == 0); - mutex_free(&mutex); + ut_a(trx_list.empty()); + trx_list.close(); m_initialised = false; } @@ -330,15 +329,11 @@ ulint trx_sys_t::any_active_transactions() { uint32_t total_trx= 0; - mutex_enter(&mutex); - for (trx_t* trx= UT_LIST_GET_FIRST(trx_sys.trx_list); - trx != NULL; - trx= UT_LIST_GET_NEXT(trx_list, trx)) - { - if (trx->state == TRX_STATE_COMMITTED_IN_MEMORY || - (trx->state == TRX_STATE_ACTIVE && trx->id)) + trx_sys.trx_list.for_each([&total_trx](const trx_t &trx) { + if (trx.state == TRX_STATE_COMMITTED_IN_MEMORY || + (trx.state == TRX_STATE_ACTIVE && trx.id)) total_trx++; - } - mutex_exit(&mutex); + }); + return total_trx; } diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 51b9bd379a5..e44b4e497d1 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -977,8 +977,9 @@ trx_start_low( ut_a(trx->lock.table_locks.empty()); /* No other thread can access this trx object through rw_trx_hash, - still it can be found through trx_sys.trx_list, which means state - change must be protected by e.g. trx->mutex. + still it can be found through trx_sys.trx_list. Sometimes it's + possible to indirectly protect trx_t::state by freezing + trx_sys.trx_list. For now we update it without mutex protection, because original code did it this way. It has to be reviewed and fixed properly. */ @@ -1343,9 +1344,9 @@ inline void trx_t::commit_in_memory(const mtr_t *mtr) /* This state change is not protected by any mutex, therefore there is an inherent race here around state transition during printouts. We ignore this race for the sake of efficiency. - However, the trx_sys_t::mutex will protect the trx_t instance - and it cannot be removed from the trx_list and freed - without first acquiring the trx_sys_t::mutex. */ + However, the freezing of trx_sys.trx_list will protect the trx_t + instance and it cannot be removed from the trx_list and freed + without first unfreezing trx_list. */ ut_ad(trx_state_eq(this, TRX_STATE_ACTIVE)); MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT); From d712956526236867de3239c18b789f5c7b497ac7 Mon Sep 17 00:00:00 2001 From: Eugene Kosov Date: Wed, 13 May 2020 01:45:54 +0300 Subject: [PATCH 08/13] MDEV-19749 MDL scalability regression after backup locks use ilist instread of I_P_List because it's generally slightly faster on inserting, removing and iterating --- sql/mdl.cc | 175 ++++++++++++++++++-------------------------- sql/mdl.h | 12 +-- sql/wsrep_mysqld.cc | 3 +- sql/wsrep_mysqld.h | 3 +- 4 files changed, 78 insertions(+), 115 deletions(-) diff --git a/sql/mdl.cc b/sql/mdl.cc index 287e0cb5f65..51387fb0f9a 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -1,4 +1,5 @@ /* Copyright (c) 2007, 2012, Oracle and/or its affiliates. + Copyright (c) 2020, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,6 +28,7 @@ #include #include #include +#include static PSI_memory_key key_memory_MDL_context_acquire_locks; @@ -340,21 +342,16 @@ public: class Ticket_list { + using List= ilist; public: - typedef I_P_List, - I_P_List_null_counter, - I_P_List_fast_push_back > - List; - operator const List &() const { return m_list; } Ticket_list() :m_bitmap(0) {} void add_ticket(MDL_ticket *ticket); void remove_ticket(MDL_ticket *ticket); - bool is_empty() const { return m_list.is_empty(); } + bool is_empty() const { return m_list.empty(); } bitmap_t bitmap() const { return m_bitmap; } + List::const_iterator begin() const { return m_list.begin(); } + List::const_iterator end() const { return m_list.end(); } private: void clear_bit_if_not_in_list(enum_mdl_type type); private: @@ -364,8 +361,6 @@ public: bitmap_t m_bitmap; }; - typedef Ticket_list::List::Iterator Ticket_iterator; - /** Helper struct which defines how different types of locks are handled @@ -574,14 +569,12 @@ public: { return m_strategy->needs_notification(ticket); } void notify_conflicting_locks(MDL_context *ctx) { - Ticket_iterator it(m_granted); - MDL_ticket *conflicting_ticket; - while ((conflicting_ticket= it++)) + for (const auto &conflicting_ticket : m_granted) { - if (conflicting_ticket->get_ctx() != ctx && - m_strategy->conflicting_locks(conflicting_ticket)) + if (conflicting_ticket.get_ctx() != ctx && + m_strategy->conflicting_locks(&conflicting_ticket)) { - MDL_context *conflicting_ctx= conflicting_ticket->get_ctx(); + MDL_context *conflicting_ctx= conflicting_ticket.get_ctx(); ctx->get_owner()-> notify_shared_lock(conflicting_ctx->get_owner(), @@ -723,21 +716,21 @@ struct mdl_iterate_arg static my_bool mdl_iterate_lock(MDL_lock *lock, mdl_iterate_arg *arg) { - int res= FALSE; /* We can skip check for m_strategy here, becase m_granted must be empty for such locks anyway. */ mysql_prlock_rdlock(&lock->m_rwlock); - MDL_lock::Ticket_iterator granted_it(lock->m_granted); - MDL_lock::Ticket_iterator waiting_it(lock->m_waiting); - MDL_ticket *ticket; - while ((ticket= granted_it++) && !(res= arg->callback(ticket, arg->argument, true))) - /* no-op */; - while ((ticket= waiting_it++) && !(res= arg->callback(ticket, arg->argument, false))) - /* no-op */; + bool res= std::any_of(lock->m_granted.begin(), lock->m_granted.end(), + [arg](MDL_ticket &ticket) { + return arg->callback(&ticket, arg->argument, true); + }); + res= std::any_of(lock->m_waiting.begin(), lock->m_waiting.end(), + [arg](MDL_ticket &ticket) { + return arg->callback(&ticket, arg->argument, false); + }); mysql_prlock_unlock(&lock->m_rwlock); - return MY_TEST(res); + return res; } @@ -1214,11 +1207,8 @@ MDL_wait::timed_wait(MDL_context_owner *owner, struct timespec *abs_timeout, void MDL_lock::Ticket_list::clear_bit_if_not_in_list(enum_mdl_type type) { - MDL_lock::Ticket_iterator it(m_list); - const MDL_ticket *ticket; - - while ((ticket= it++)) - if (ticket->get_type() == type) + for (const auto &ticket : m_list) + if (ticket.get_type() == type) return; m_bitmap&= ~ MDL_BIT(type); } @@ -1241,30 +1231,15 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) if ((this == &(ticket->get_lock()->m_waiting)) && wsrep_thd_is_BF(ticket->get_ctx()->get_thd(), false)) { - Ticket_iterator itw(ticket->get_lock()->m_waiting); - MDL_ticket *waiting; - MDL_ticket *prev=NULL; - bool added= false; - DBUG_ASSERT(WSREP(ticket->get_ctx()->get_thd())); - while ((waiting= itw++) && !added) - { - if (!wsrep_thd_is_BF(waiting->get_ctx()->get_thd(), true)) - { - WSREP_DEBUG("MDL add_ticket inserted before: %lu %s", - thd_get_thread_id(waiting->get_ctx()->get_thd()), - wsrep_thd_query(waiting->get_ctx()->get_thd())); - /* Insert the ticket before the first non-BF waiting thd. */ - m_list.insert_after(prev, ticket); - added= true; - } - prev= waiting; - } - - /* Otherwise, insert the ticket at the back of the waiting list. */ - if (!added) - m_list.push_back(ticket); + m_list.insert(std::find_if(ticket->get_lock()->m_waiting.begin(), + ticket->get_lock()->m_waiting.end(), + [](const MDL_ticket &waiting) { + return !wsrep_thd_is_BF( + waiting.get_ctx()->get_thd(), true); + }), + *ticket); } else #endif /* WITH_WSREP */ @@ -1273,7 +1248,7 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) Add ticket to the *back* of the queue to ensure fairness among requests with the same priority. */ - m_list.push_back(ticket); + m_list.push_back(*ticket); } m_bitmap|= MDL_BIT(ticket->get_type()); } @@ -1286,7 +1261,7 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) void MDL_lock::Ticket_list::remove_ticket(MDL_ticket *ticket) { - m_list.remove(ticket); + m_list.remove(*ticket); /* Check if waiting queue has another ticket with the same type as one which was removed. If there is no such ticket, i.e. we have @@ -1314,8 +1289,6 @@ void MDL_lock::Ticket_list::remove_ticket(MDL_ticket *ticket) void MDL_lock::reschedule_waiters() { - MDL_lock::Ticket_iterator it(m_waiting); - MDL_ticket *ticket; bool skip_high_priority= false; bitmap_t hog_lock_types= hog_lock_types_bitmap(); @@ -1370,20 +1343,20 @@ void MDL_lock::reschedule_waiters() grant SNRW lock and there are no pending S or SH locks. */ - while ((ticket= it++)) + for (auto it= m_waiting.begin(); it != m_waiting.end(); ++it) { /* Skip high-prio, strong locks if earlier we have decided to give way to low-prio, weaker locks. */ if (skip_high_priority && - ((MDL_BIT(ticket->get_type()) & hog_lock_types) != 0)) + ((MDL_BIT(it->get_type()) & hog_lock_types) != 0)) continue; - if (can_grant_lock(ticket->get_type(), ticket->get_ctx(), + if (can_grant_lock(it->get_type(), it->get_ctx(), skip_high_priority)) { - if (! ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED)) + if (!it->get_ctx()->m_wait.set_status(MDL_wait::GRANTED)) { /* Satisfy the found request by updating lock structures. @@ -1393,15 +1366,19 @@ void MDL_lock::reschedule_waiters() when manages to do so, already sees an updated state of the MDL_lock object. */ - m_waiting.remove_ticket(ticket); - m_granted.add_ticket(ticket); + auto prev_it= std::prev(it); // this might be begin()-- but the hack + // works because list is circular + m_waiting.remove_ticket(&*it); + m_granted.add_ticket(&*it); /* Increase counter of successively granted high-priority strong locks, if we have granted one. */ - if ((MDL_BIT(ticket->get_type()) & hog_lock_types) != 0) + if ((MDL_BIT(it->get_type()) & hog_lock_types) != 0) m_hog_lock_count++; + + it= prev_it; } /* If we could not update the wait slot of the waiter, @@ -1774,14 +1751,13 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, if (m_granted.bitmap() & granted_incompat_map) { - Ticket_iterator it(m_granted); bool can_grant= true; /* Check that the incompatible lock belongs to some other context. */ - while (auto ticket= it++) + for (const auto &ticket : m_granted) { - if (ticket->get_ctx() != requestor_ctx && - ticket->is_incompatible_when_granted(type_arg)) + if (ticket.get_ctx() != requestor_ctx && + ticket.is_incompatible_when_granted(type_arg)) { can_grant= false; #ifdef WITH_WSREP @@ -1793,12 +1769,12 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, requestor_ctx->get_thd()->wsrep_cs().mode() == wsrep::client_state::m_rsu) { - wsrep_handle_mdl_conflict(requestor_ctx, ticket, &key); + wsrep_handle_mdl_conflict(requestor_ctx, &ticket, &key); if (wsrep_log_conflicts) { - auto key= ticket->get_key(); + auto key= ticket.get_key(); WSREP_INFO("MDL conflict db=%s table=%s ticket=%d solved by abort", - key->db_name(), key->name(), ticket->get_type()); + key->db_name(), key->name(), ticket.get_type()); } continue; } @@ -1820,12 +1796,10 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, inline unsigned long MDL_lock::get_lock_owner() const { - Ticket_iterator it(m_granted); - MDL_ticket *ticket; + if (m_granted.is_empty()) + return 0; - if ((ticket= it++)) - return ticket->get_ctx()->get_thread_id(); - return 0; + return m_granted.begin()->get_ctx()->get_thread_id(); } @@ -2234,18 +2208,16 @@ MDL_context::clone_ticket(MDL_request *mdl_request) #ifndef DBUG_OFF bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx) { - Ticket_iterator it(m_granted); - MDL_ticket *conflicting_ticket; rpl_group_info *rgi_slave= ctx->get_thd()->rgi_slave; if (!rgi_slave->gtid_sub_id) return 0; - while ((conflicting_ticket= it++)) + for (const auto &conflicting_ticket : m_granted) { - if (conflicting_ticket->get_ctx() != ctx) + if (conflicting_ticket.get_ctx() != ctx) { - MDL_context *conflicting_ctx= conflicting_ticket->get_ctx(); + MDL_context *conflicting_ctx= conflicting_ticket.get_ctx(); rpl_group_info *conflicting_rgi_slave; conflicting_rgi_slave= conflicting_ctx->get_thd()->rgi_slave; @@ -2622,16 +2594,11 @@ MDL_context::upgrade_shared_lock(MDL_ticket *mdl_ticket, bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket, MDL_wait_for_graph_visitor *gvisitor) { - MDL_ticket *ticket; MDL_context *src_ctx= waiting_ticket->get_ctx(); bool result= TRUE; mysql_prlock_rdlock(&m_rwlock); - /* Must be initialized after taking a read lock. */ - Ticket_iterator granted_it(m_granted); - Ticket_iterator waiting_it(m_waiting); - /* MDL_lock's waiting and granted queues and MDL_context::m_waiting_for member are updated by different threads when the lock is granted @@ -2698,46 +2665,44 @@ bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket, node. In workloads that involve wait-for graph loops this has proven to be a more efficient strategy [citation missing]. */ - while ((ticket= granted_it++)) + for (const auto& ticket : m_granted) { /* Filter out edges that point to the same node. */ - if (ticket->get_ctx() != src_ctx && - ticket->is_incompatible_when_granted(waiting_ticket->get_type()) && - gvisitor->inspect_edge(ticket->get_ctx())) + if (ticket.get_ctx() != src_ctx && + ticket.is_incompatible_when_granted(waiting_ticket->get_type()) && + gvisitor->inspect_edge(ticket.get_ctx())) { goto end_leave_node; } } - while ((ticket= waiting_it++)) + for (const auto &ticket : m_waiting) { /* Filter out edges that point to the same node. */ - if (ticket->get_ctx() != src_ctx && - ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) && - gvisitor->inspect_edge(ticket->get_ctx())) + if (ticket.get_ctx() != src_ctx && + ticket.is_incompatible_when_waiting(waiting_ticket->get_type()) && + gvisitor->inspect_edge(ticket.get_ctx())) { goto end_leave_node; } } /* Recurse and inspect all adjacent nodes. */ - granted_it.rewind(); - while ((ticket= granted_it++)) + for (const auto &ticket : m_granted) { - if (ticket->get_ctx() != src_ctx && - ticket->is_incompatible_when_granted(waiting_ticket->get_type()) && - ticket->get_ctx()->visit_subgraph(gvisitor)) + if (ticket.get_ctx() != src_ctx && + ticket.is_incompatible_when_granted(waiting_ticket->get_type()) && + ticket.get_ctx()->visit_subgraph(gvisitor)) { goto end_leave_node; } } - waiting_it.rewind(); - while ((ticket= waiting_it++)) + for (const auto &ticket : m_waiting) { - if (ticket->get_ctx() != src_ctx && - ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) && - ticket->get_ctx()->visit_subgraph(gvisitor)) + if (ticket.get_ctx() != src_ctx && + ticket.is_incompatible_when_waiting(waiting_ticket->get_type()) && + ticket.get_ctx()->visit_subgraph(gvisitor)) { goto end_leave_node; } @@ -3286,7 +3251,7 @@ const char *wsrep_get_mdl_namespace_name(MDL_key::enum_mdl_namespace ns) return "UNKNOWN"; } -void MDL_ticket::wsrep_report(bool debug) +void MDL_ticket::wsrep_report(bool debug) const { if (!debug) return; diff --git a/sql/mdl.h b/sql/mdl.h index 4659238e9f2..dd10b3a45d0 100644 --- a/sql/mdl.h +++ b/sql/mdl.h @@ -1,6 +1,7 @@ #ifndef MDL_H #define MDL_H /* Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2020, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,6 +17,7 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ #include "sql_plist.h" +#include "ilist.h" #include #include #include @@ -685,7 +687,7 @@ public: threads/contexts. */ -class MDL_ticket : public MDL_wait_for_subgraph +class MDL_ticket : public MDL_wait_for_subgraph, public ilist_node<> { public: /** @@ -694,15 +696,9 @@ public: */ MDL_ticket *next_in_context; MDL_ticket **prev_in_context; - /** - Pointers for participating in the list of satisfied/pending requests - for the lock. Externally accessible. - */ - MDL_ticket *next_in_lock; - MDL_ticket **prev_in_lock; public: #ifdef WITH_WSREP - void wsrep_report(bool debug); + void wsrep_report(bool debug) const; #endif /* WITH_WSREP */ bool has_pending_conflicting_lock() const; diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index e818d577768..0c31631d19b 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -1,4 +1,5 @@ /* Copyright 2008-2015 Codership Oy + Copyright (c) 2020, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2368,7 +2369,7 @@ void wsrep_to_isolation_end(THD *thd) */ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, - MDL_ticket *ticket, + const MDL_ticket *ticket, const MDL_key *key) { /* Fallback to the non-wsrep behaviour */ diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h index 6d1ff1d1ed0..3347a94cdd6 100644 --- a/sql/wsrep_mysqld.h +++ b/sql/wsrep_mysqld.h @@ -1,4 +1,5 @@ /* Copyright 2008-2017 Codership Oy + Copyright (c) 2020, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -527,7 +528,7 @@ void wsrep_keys_free(wsrep_key_arr_t* key_arr); extern void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, - MDL_ticket *ticket, + const MDL_ticket *ticket, const MDL_key *key); enum wsrep_thread_type { From dbe15e9e5a57d3212f8fd3d78c5cebd22d180853 Mon Sep 17 00:00:00 2001 From: Eugene Kosov Date: Fri, 15 May 2020 18:39:05 +0300 Subject: [PATCH 09/13] MDEV-19749 MDL scalability regression after backup locks MDL_lock::Ticket_list::remove_ticket(): reduce algoritmic complexity from O(N) to O(1) MDL_lock::Ticket_list::clear_bit_if_not_in_list(): removed MDL_lock::Ticket_list::m_type_counters: a map of ticket type to count. Initialization is memset(0) which takes time. --- sql/mdl.cc | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/sql/mdl.cc b/sql/mdl.cc index 51387fb0f9a..6cdea8c3ebd 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -29,6 +29,7 @@ #include #include #include +#include static PSI_memory_key key_memory_MDL_context_acquire_locks; @@ -344,7 +345,7 @@ public: { using List= ilist; public: - Ticket_list() :m_bitmap(0) {} + Ticket_list() :m_bitmap(0) { m_type_counters.fill(0); } void add_ticket(MDL_ticket *ticket); void remove_ticket(MDL_ticket *ticket); @@ -352,13 +353,12 @@ public: bitmap_t bitmap() const { return m_bitmap; } List::const_iterator begin() const { return m_list.begin(); } List::const_iterator end() const { return m_list.end(); } - private: - void clear_bit_if_not_in_list(enum_mdl_type type); private: /** List of tickets. */ List m_list; /** Bitmap of types of tickets in this list. */ bitmap_t m_bitmap; + std::array m_type_counters; // hash table }; @@ -1196,24 +1196,6 @@ MDL_wait::timed_wait(MDL_context_owner *owner, struct timespec *abs_timeout, } -/** - Clear bit corresponding to the type of metadata lock in bitmap representing - set of such types if list of tickets does not contain ticket with such type. - - @param[in,out] bitmap Bitmap representing set of types of locks. - @param[in] list List to inspect. - @param[in] type Type of metadata lock to look up in the list. -*/ - -void MDL_lock::Ticket_list::clear_bit_if_not_in_list(enum_mdl_type type) -{ - for (const auto &ticket : m_list) - if (ticket.get_type() == type) - return; - m_bitmap&= ~ MDL_BIT(type); -} - - /** Add ticket to MDL_lock's list of waiting requests and update corresponding bitmap of lock types. @@ -1251,6 +1233,7 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) m_list.push_back(*ticket); } m_bitmap|= MDL_BIT(ticket->get_type()); + m_type_counters[ticket->get_type()]++; } @@ -1267,12 +1250,9 @@ void MDL_lock::Ticket_list::remove_ticket(MDL_ticket *ticket) one which was removed. If there is no such ticket, i.e. we have removed last ticket of particular type, then we need to update bitmap of waiting ticket's types. - Note that in most common case, i.e. when shared lock is removed - from waiting queue, we are likely to find ticket of the same - type early without performing full iteration through the list. - So this method should not be too expensive. */ - clear_bit_if_not_in_list(ticket->get_type()); + if (--m_type_counters[ticket->get_type()] == 0) + m_bitmap&= ~MDL_BIT(ticket->get_type()); } From 89c3b087a39933f1b9f01c22fc67bfe0f54ea50b Mon Sep 17 00:00:00 2001 From: Vincent Milum Jr Date: Tue, 11 Feb 2020 10:27:59 -0800 Subject: [PATCH 10/13] MDEV-21709 ZFS snapdir=visible breaks Galera rsync SST replcation Fix for Galera rsync SST with the specific conditions listed in MDEV-21709 --- scripts/wsrep_sst_rsync.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index 5403b9e8505..5de985237fc 100644 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -200,6 +200,7 @@ fi # New filter - exclude everything except dirs (schemas) and innodb files FILTER="-f '- /lost+found' + -f '- /.zfs' -f '- /.fseventsd' -f '- /.Trashes' -f '+ /wsrep_sst_binlog.tar' @@ -357,7 +358,7 @@ EOF [ "$OS" = "Linux" ] && count=$(grep -c processor /proc/cpuinfo) [ "$OS" = "Darwin" -o "$OS" = "FreeBSD" ] && count=$(sysctl -n hw.ncpu) - find . -maxdepth 1 -mindepth 1 -type d -not -name "lost+found" \ + find . -maxdepth 1 -mindepth 1 -type d -not -name "lost+found" -not -name ".zfs" \ -print0 | xargs -I{} -0 -P $count \ rsync ${STUNNEL:+--rsh="$STUNNEL"} \ --owner --group --perms --links --specials \ From d09dd5e86c1ce77cf00858fb5628c3745a1213a5 Mon Sep 17 00:00:00 2001 From: Vincent Milum Jr Date: Tue, 11 Feb 2020 15:35:51 -0800 Subject: [PATCH 11/13] Exclude needs to be on receiving side too Exclude needs to be on receiving side too It is possible that other excludes may need to be added to the receiving side too that exist on the sending side. --- scripts/wsrep_sst_rsync.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index 5de985237fc..96e542ce844 100644 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -441,6 +441,7 @@ timeout = 300 $SILENT [$MODULE] path = $WSREP_SST_OPT_DATA + exclude = .zfs [$MODULE-log_dir] path = $WSREP_LOG_DIR [$MODULE-data_dir] From d1bb7f9143ceafe9cbdc1e53c4ffd4abe232cb97 Mon Sep 17 00:00:00 2001 From: Monty Date: Wed, 24 Jun 2020 12:51:55 +0300 Subject: [PATCH 12/13] Fixed memory leak in item_sum.cc::report_cut_value_error() Only affects DBUG builds --- sql/item_sum.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sql/item_sum.cc b/sql/item_sum.cc index e79344507b2..5573cc132f0 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -3737,13 +3737,16 @@ static void report_cut_value_error(THD *thd, uint row_count, const char *fname) { size_t fn_len= strlen(fname); char *fname_upper= (char *) my_alloca(fn_len + 1); - fname_upper[fn_len]= 0; - for (; fn_len; fn_len--) - fname_upper[fn_len-1]= my_toupper(&my_charset_latin1, fname[fn_len-1]); + if (!fname_upper) + fname_upper= (char*) fname; // Out of memory + else + memcpy(fname_upper, fname, fn_len+1); + my_caseup_str(&my_charset_latin1, fname_upper); push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_CUT_VALUE_GROUP_CONCAT, ER_THD(thd, ER_CUT_VALUE_GROUP_CONCAT), row_count, fname_upper); + my_afree(fname_upper); } From 23c53df4b71b0b7d00741129acc2b52238e9baef Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Wed, 24 Jun 2020 06:05:16 -0400 Subject: [PATCH 13/13] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 1f93120f84d..eb704d39d4a 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=4 +MYSQL_VERSION_PATCH=5 SERVER_MATURITY=stable