From f7780a8eb8aaa7e9ea9b1bb5b3b214b07eb4190f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 25 Aug 2023 13:41:54 +0300 Subject: [PATCH] MDEV-30100: Assertion purge_sys.tail.trx_no <= purge_sys.rseg->last_trx_no() trx_t::commit_empty(): A special case of transaction "commit" when the transaction was actually rolled back or the persistent undo log is empty. In this case, we need to change the undo log header state to TRX_UNDO_CACHED and move the undo log from rseg->undo_list to rseg->undo_cached for fast reuse. Furthermore, unless this is the only undo log record in the page, we will remove the record and rewind TRX_UNDO_PAGE_START, TRX_UNDO_PAGE_FREE, TRX_UNDO_LAST_LOG. We must also ensure that the system-wide transaction identifier will be persisted up to this->id, so that there will not be warnings or errors due to a PAGE_MAX_TRX_ID being too large. We might have modified secondary index pages before being rolled back, and any changes of PAGE_MAX_TRX_ID are never rolled back. Even though it is not going to be written persistently anywhere, we will invoke trx_sys.assign_new_trx_no(this), so that in the test innodb.instant_alter everything will be purged as expected. trx_t::write_serialisation_history(): Renamed from trx_write_serialisation_history(). If there is no undo log, invoke commit_empty(). trx_purge_add_undo_to_history(): Simplify an assertion and remove a comment. This function will not be invoked on an empty undo log anymore. trx_undo_header_create(): Add a debug assertion. trx_undo_mem_create_at_db_start(): Remove a duplicated assignment. Reviewed by: Vladislav Lesin Tested by: Matthias Leich --- .../suite/innodb/r/instant_alter_debug.result | 2 +- .../suite/innodb/t/instant_alter_debug.test | 2 +- storage/innobase/include/trx0trx.h | 10 +- storage/innobase/trx/trx0purge.cc | 12 +- storage/innobase/trx/trx0trx.cc | 171 ++++++++++++++++-- storage/innobase/trx/trx0undo.cc | 5 +- 6 files changed, 172 insertions(+), 30 deletions(-) diff --git a/mysql-test/suite/innodb/r/instant_alter_debug.result b/mysql-test/suite/innodb/r/instant_alter_debug.result index 0b6d44aedc3..7c378aa62e3 100644 --- a/mysql-test/suite/innodb/r/instant_alter_debug.result +++ b/mysql-test/suite/innodb/r/instant_alter_debug.result @@ -182,7 +182,7 @@ ROLLBACK; connection stop_purge; COMMIT; connection default; -InnoDB 2 transactions not purged +InnoDB 1 transactions not purged SET DEBUG_SYNC='now SIGNAL logged'; connection ddl; connection default; diff --git a/mysql-test/suite/innodb/t/instant_alter_debug.test b/mysql-test/suite/innodb/t/instant_alter_debug.test index 917226a6c26..11d6961f918 100644 --- a/mysql-test/suite/innodb/t/instant_alter_debug.test +++ b/mysql-test/suite/innodb/t/instant_alter_debug.test @@ -200,7 +200,7 @@ COMMIT; connection default; # Wait for purge to empty the table. -let $wait_all_purged=2; +let $wait_all_purged=1; --source include/wait_all_purged.inc let $wait_all_purged=0; diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 38895883607..36dbde99c68 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -959,11 +959,19 @@ private: /** Commit the transaction in a mini-transaction. @param mtr mini-transaction (if there are any persistent modifications) */ void commit_low(mtr_t *mtr= nullptr); + /** Commit an empty transaction. + @param mtr mini-transaction */ + void commit_empty(mtr_t *mtr); + /** Commit an empty transaction. + @param mtr mini-transaction */ + /** Assign the transaction its history serialisation number and write the + UNDO log to the assigned rollback segment. + @param mtr mini-transaction */ + inline void write_serialisation_history(mtr_t *mtr); public: /** Commit the transaction. */ void commit(); - /** Try to drop a persistent table. @param table persistent table @param fk whether to drop FOREIGN KEY metadata diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 9c464901e14..b1960e58924 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -307,11 +307,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) undo= nullptr; - /* After the purge thread has been given permission to exit, - we may roll back transactions (trx->undo_no==0) - in THD::cleanup() invoked from unlink_thd() in fast shutdown, - or in trx_rollback_recovered() in slow shutdown. - + /* Before any transaction-generating background threads or the purge have been started, we can start transactions in row_merge_drop_temp_indexes(), and roll back recovered transactions. @@ -323,12 +319,10 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) During fast shutdown, we may also continue to execute user transactions. */ - ut_ad(srv_undo_sources || trx->undo_no == 0 || + ut_ad(srv_undo_sources || srv_fast_shutdown || (!purge_sys.enabled() && (srv_is_being_started || - trx_rollback_is_active || - srv_force_recovery >= SRV_FORCE_NO_BACKGROUND)) || - srv_fast_shutdown); + srv_force_recovery >= SRV_FORCE_NO_BACKGROUND))); #ifdef WITH_WSREP if (wsrep_is_wsrep_xid(&trx->xid)) diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index ebffd87647d..75bbd9889c1 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -968,15 +968,150 @@ trx_start_low( ut_a(trx->error_state == DB_SUCCESS); } +/** Release an empty undo log that was associated with a transaction. */ +ATTRIBUTE_COLD +void trx_t::commit_empty(mtr_t *mtr) +{ + trx_rseg_t *rseg= rsegs.m_redo.rseg; + trx_undo_t *&undo= rsegs.m_redo.undo; + + ut_ad(undo->state == TRX_UNDO_ACTIVE || undo->state == TRX_UNDO_PREPARED); + ut_ad(undo->size == 1); + + if (buf_block_t *u= + buf_page_get(page_id_t(rseg->space->id, undo->hdr_page_no), 0, + RW_X_LATCH, mtr)) + { + ut_d(const uint16_t state= + mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + u->page.frame)); + ut_ad(state == undo->state || state == TRX_UNDO_ACTIVE); + static_assert(TRX_UNDO_PAGE_START + 2 == TRX_UNDO_PAGE_FREE, + "compatibility"); + ut_ad(!memcmp(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + u->page.frame, + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + u->page.frame, 2)); + ut_ad(mach_read_from_4(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV + + FIL_ADDR_PAGE + u->page.frame) == FIL_NULL); + ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV + + FIL_ADDR_BYTE + u->page.frame) == 0); + ut_ad(!memcmp(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV + + u->page.frame, + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_NEXT + + u->page.frame, FIL_ADDR_SIZE)); + + /* Delete the last undo log header, which must be for this transaction. + + An undo segment can be reused (TRX_UNDO_CACHED) only if it + comprises of one page and that single page contains enough space + for the undo log header of a subsequent transaction. See + trx_purge_add_undo_to_history(), which is executed when committing + a nonempty transaction. + + If we simply changed the undo page state to TRX_UNDO_CACHED, + then trx_undo_reuse_cached() could run out of space. We will + release the space consumed by our empty undo log to avoid that. */ + for (byte *last= &u->page.frame[TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE], + *prev= nullptr;;) + { + /* TRX_UNDO_PREV_LOG is only being read in debug assertions, and + written in trx_undo_header_create(). To remain compatible with + possibly corrupted old data files, we will not read the field + TRX_UNDO_PREV_LOG but instead rely on TRX_UNDO_NEXT_LOG. */ + ut_ad(mach_read_from_2(TRX_UNDO_PREV_LOG + last) == + (reinterpret_cast(prev) & (srv_page_size - 1))); + + if (uint16_t next= mach_read_from_2(TRX_UNDO_NEXT_LOG + last)) + { + ut_ad(ulint{next} + TRX_UNDO_LOG_XA_HDR_SIZE < srv_page_size - 100); + ut_ad(&u->page.frame[next] > last); + ut_ad(mach_read_from_2(TRX_UNDO_LOG_START + last) <= next); + prev= last; + last= &u->page.frame[next]; + continue; + } + + ut_ad(mach_read_from_8(TRX_UNDO_TRX_ID + last) == id); + ut_ad(!mach_read_from_8(TRX_UNDO_TRX_NO + last)); + ut_ad(!memcmp(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + u->page.frame, + TRX_UNDO_LOG_START + last, 2)); + + if (prev) + { + mtr->memcpy(*u, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + + u->page.frame, prev + TRX_UNDO_LOG_START, 2); + const ulint free= page_offset(last); + mtr->write<2>(*u, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + + u->page.frame, free); + mtr->write<2>(*u, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + u->page.frame, + TRX_UNDO_CACHED); + mtr->write<2>(*u, TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG + u->page.frame, + page_offset(prev)); + mtr->write<2>(*u, prev + TRX_UNDO_NEXT_LOG, 0U); + mtr->memset(u, free, srv_page_size - FIL_PAGE_DATA_END - free, 0); + + /* We may have updated PAGE_MAX_TRX_ID on secondary index pages + to this->id. Ensure that trx_sys.m_max_trx_id will be recovered + correctly, even though we removed our undo log record along + with the TRX_UNDO_TRX_ID above. */ + + /* Below, we are acquiring rseg_header->page.lock after + u->page.lock (the opposite of trx_purge_add_undo_to_history()). + This is fine, because both functions are holding exclusive + rseg->latch. */ + + if (mach_read_from_8(prev + TRX_UNDO_TRX_NO) >= id); + else if (buf_block_t *rseg_header= rseg->get(mtr, nullptr)) + { + byte *m= TRX_RSEG + TRX_RSEG_MAX_TRX_ID + rseg_header->page.frame; + + do + { + if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + + rseg_header->page.frame))) + /* This must have been upgraded from before MariaDB 10.3.5. */ + trx_rseg_format_upgrade(rseg_header, mtr); + else if (mach_read_from_8(m) >= id) + continue; + mtr->write<8>(*rseg_header, m, id); + } + while (0); + } + } + else + /* Our undo log header was right after the undo log segment header. + This page should have been created by trx_undo_create(), not + returned by trx_undo_reuse_cached(). + + We retain the dummy empty log in order to remain compatible with + trx_undo_mem_create_at_db_start(). This page will remain available + to trx_undo_reuse_cached(), and it will eventually be freed by + trx_purge_truncate_rseg_history(). */ + mtr->write<2>(*u, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + u->page.frame, + TRX_UNDO_CACHED); + break; + } + } + else + ut_ad("undo log page was not found" == 0); + + UT_LIST_REMOVE(rseg->undo_list, undo); + UT_LIST_ADD_FIRST(rseg->undo_cached, undo); + undo->state= TRX_UNDO_CACHED; + undo= nullptr; + + /* We must assign an "end" identifier even though we are not going + to persistently write it anywhere, to make sure that the purge of + history will not be stuck. */ + trx_sys.assign_new_trx_no(this); +} + /** Assign the transaction its history serialisation number and write the UNDO log to the assigned rollback segment. -@param trx persistent transaction @param mtr mini-transaction */ -static void trx_write_serialisation_history(trx_t *trx, mtr_t *mtr) +inline void trx_t::write_serialisation_history(mtr_t *mtr) { - ut_ad(!trx->read_only); - trx_rseg_t *rseg= trx->rsegs.m_redo.rseg; - trx_undo_t *&undo= trx->rsegs.m_redo.undo; + ut_ad(!read_only); + trx_rseg_t *rseg= rsegs.m_redo.rseg; + trx_undo_t *&undo= rsegs.m_redo.undo; if (UNIV_LIKELY(undo != nullptr)) { MONITOR_INC(MONITOR_TRX_COMMIT_UNDO); @@ -988,26 +1123,32 @@ static void trx_write_serialisation_history(trx_t *trx, mtr_t *mtr) ut_ad(undo->rseg == rseg); /* Assign the transaction serialisation number and add any undo log to the purge queue. */ - if (rseg->last_page_no == FIL_NULL) + if (UNIV_UNLIKELY(!undo_no)) + { + /* The transaction was rolled back. */ + commit_empty(mtr); + goto done; + } + else if (rseg->last_page_no == FIL_NULL) { mysql_mutex_lock(&purge_sys.pq_mutex); - trx_sys.assign_new_trx_no(trx); - const trx_id_t end{trx->rw_trx_hash_element->no}; - /* If the rollback segment is not empty, trx->no cannot be less - than any trx_t::no already in rseg. User threads only produce - events when a rollback segment is empty. */ + trx_sys.assign_new_trx_no(this); + const trx_id_t end{rw_trx_hash_element->no}; + /* end cannot be less than anything in rseg. User threads only + produce events when a rollback segment is empty. */ purge_sys.purge_queue.push(TrxUndoRsegs{end, *rseg}); mysql_mutex_unlock(&purge_sys.pq_mutex); rseg->last_page_no= undo->hdr_page_no; rseg->set_last_commit(undo->hdr_offset, end); } else - trx_sys.assign_new_trx_no(trx); + trx_sys.assign_new_trx_no(this); UT_LIST_REMOVE(rseg->undo_list, undo); /* Change the undo log segment state from TRX_UNDO_ACTIVE, to define the transaction as committed in the file based domain, at mtr->commit_lsn() obtained in mtr->commit() below. */ - trx_purge_add_undo_to_history(trx, undo, mtr); + trx_purge_add_undo_to_history(this, undo, mtr); + done: rseg->release(); rseg->latch.wr_unlock(); } @@ -1218,7 +1359,7 @@ ATTRIBUTE_NOINLINE static void trx_commit_cleanup(trx_undo_t *&undo) TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr) { - /* We already detached from rseg in trx_write_serialisation_history() */ + /* We already detached from rseg in write_serialisation_history() */ ut_ad(!rsegs.m_redo.undo); read_view.close(); @@ -1409,7 +1550,7 @@ TRANSACTIONAL_TARGET void trx_t::commit_low(mtr_t *mtr) different rollback segments. However, if a transaction T2 is able to see modifications made by a transaction T1, T2 will always get a bigger transaction number and a bigger commit lsn than T1. */ - trx_write_serialisation_history(this, mtr); + write_serialisation_history(mtr); } else if (trx_rseg_t *rseg= rsegs.m_redo.rseg) { diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 8354d04edc6..657a7c48ff1 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -497,8 +497,7 @@ trx_undo_seg_create(fil_space_t *space, buf_block_t *rseg_hdr, ulint *id, ut_ad(slot_no < TRX_RSEG_N_SLOTS); - *err = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO, - mtr); + *err = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO, mtr); if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { return NULL; } @@ -569,6 +568,7 @@ static uint16_t trx_undo_header_create(buf_block_t *undo_page, trx_id_t trx_id, start, 2); uint16_t prev_log= mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG + undo_page->page.frame); + ut_ad(prev_log < free); alignas(4) byte buf[4]; mach_write_to_2(buf, TRX_UNDO_ACTIVE); mach_write_to_2(buf + 2, free); @@ -1022,7 +1022,6 @@ corrupted_type: case TRX_UNDO_ACTIVE: case TRX_UNDO_PREPARED: if (UNIV_LIKELY(type != 1)) { - trx_no = trx_id + 1; break; } sql_print_error("InnoDB: upgrade from older version than"