diff --git a/mysql-test/suite/innodb/r/instant_alter_debug.result b/mysql-test/suite/innodb/r/instant_alter_debug.result index 0b6d44aedc3..7c378aa62e3 100644 --- a/mysql-test/suite/innodb/r/instant_alter_debug.result +++ b/mysql-test/suite/innodb/r/instant_alter_debug.result @@ -182,7 +182,7 @@ ROLLBACK; connection stop_purge; COMMIT; connection default; -InnoDB 2 transactions not purged +InnoDB 1 transactions not purged SET DEBUG_SYNC='now SIGNAL logged'; connection ddl; connection default; diff --git a/mysql-test/suite/innodb/t/instant_alter_debug.test b/mysql-test/suite/innodb/t/instant_alter_debug.test index 917226a6c26..11d6961f918 100644 --- a/mysql-test/suite/innodb/t/instant_alter_debug.test +++ b/mysql-test/suite/innodb/t/instant_alter_debug.test @@ -200,7 +200,7 @@ COMMIT; connection default; # Wait for purge to empty the table. -let $wait_all_purged=2; +let $wait_all_purged=1; --source include/wait_all_purged.inc let $wait_all_purged=0; diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 38895883607..36dbde99c68 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -959,11 +959,19 @@ private: /** Commit the transaction in a mini-transaction. @param mtr mini-transaction (if there are any persistent modifications) */ void commit_low(mtr_t *mtr= nullptr); + /** Commit an empty transaction. + @param mtr mini-transaction */ + void commit_empty(mtr_t *mtr); + /** Commit an empty transaction. + @param mtr mini-transaction */ + /** Assign the transaction its history serialisation number and write the + UNDO log to the assigned rollback segment. + @param mtr mini-transaction */ + inline void write_serialisation_history(mtr_t *mtr); public: /** Commit the transaction. */ void commit(); - /** Try to drop a persistent table. @param table persistent table @param fk whether to drop FOREIGN KEY metadata diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 9c464901e14..b1960e58924 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -307,11 +307,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) undo= nullptr; - /* After the purge thread has been given permission to exit, - we may roll back transactions (trx->undo_no==0) - in THD::cleanup() invoked from unlink_thd() in fast shutdown, - or in trx_rollback_recovered() in slow shutdown. - + /* Before any transaction-generating background threads or the purge have been started, we can start transactions in row_merge_drop_temp_indexes(), and roll back recovered transactions. @@ -323,12 +319,10 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) During fast shutdown, we may also continue to execute user transactions. */ - ut_ad(srv_undo_sources || trx->undo_no == 0 || + ut_ad(srv_undo_sources || srv_fast_shutdown || (!purge_sys.enabled() && (srv_is_being_started || - trx_rollback_is_active || - srv_force_recovery >= SRV_FORCE_NO_BACKGROUND)) || - srv_fast_shutdown); + srv_force_recovery >= SRV_FORCE_NO_BACKGROUND))); #ifdef WITH_WSREP if (wsrep_is_wsrep_xid(&trx->xid)) diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index ebffd87647d..75bbd9889c1 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -968,15 +968,150 @@ trx_start_low( ut_a(trx->error_state == DB_SUCCESS); } +/** Release an empty undo log that was associated with a transaction. */ +ATTRIBUTE_COLD +void trx_t::commit_empty(mtr_t *mtr) +{ + trx_rseg_t *rseg= rsegs.m_redo.rseg; + trx_undo_t *&undo= rsegs.m_redo.undo; + + ut_ad(undo->state == TRX_UNDO_ACTIVE || undo->state == TRX_UNDO_PREPARED); + ut_ad(undo->size == 1); + + if (buf_block_t *u= + buf_page_get(page_id_t(rseg->space->id, undo->hdr_page_no), 0, + RW_X_LATCH, mtr)) + { + ut_d(const uint16_t state= + mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + u->page.frame)); + ut_ad(state == undo->state || state == TRX_UNDO_ACTIVE); + static_assert(TRX_UNDO_PAGE_START + 2 == TRX_UNDO_PAGE_FREE, + "compatibility"); + ut_ad(!memcmp(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + u->page.frame, + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + u->page.frame, 2)); + ut_ad(mach_read_from_4(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV + + FIL_ADDR_PAGE + u->page.frame) == FIL_NULL); + ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV + + FIL_ADDR_BYTE + u->page.frame) == 0); + ut_ad(!memcmp(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_PREV + + u->page.frame, + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + FLST_NEXT + + u->page.frame, FIL_ADDR_SIZE)); + + /* Delete the last undo log header, which must be for this transaction. + + An undo segment can be reused (TRX_UNDO_CACHED) only if it + comprises of one page and that single page contains enough space + for the undo log header of a subsequent transaction. See + trx_purge_add_undo_to_history(), which is executed when committing + a nonempty transaction. + + If we simply changed the undo page state to TRX_UNDO_CACHED, + then trx_undo_reuse_cached() could run out of space. We will + release the space consumed by our empty undo log to avoid that. */ + for (byte *last= &u->page.frame[TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE], + *prev= nullptr;;) + { + /* TRX_UNDO_PREV_LOG is only being read in debug assertions, and + written in trx_undo_header_create(). To remain compatible with + possibly corrupted old data files, we will not read the field + TRX_UNDO_PREV_LOG but instead rely on TRX_UNDO_NEXT_LOG. */ + ut_ad(mach_read_from_2(TRX_UNDO_PREV_LOG + last) == + (reinterpret_cast(prev) & (srv_page_size - 1))); + + if (uint16_t next= mach_read_from_2(TRX_UNDO_NEXT_LOG + last)) + { + ut_ad(ulint{next} + TRX_UNDO_LOG_XA_HDR_SIZE < srv_page_size - 100); + ut_ad(&u->page.frame[next] > last); + ut_ad(mach_read_from_2(TRX_UNDO_LOG_START + last) <= next); + prev= last; + last= &u->page.frame[next]; + continue; + } + + ut_ad(mach_read_from_8(TRX_UNDO_TRX_ID + last) == id); + ut_ad(!mach_read_from_8(TRX_UNDO_TRX_NO + last)); + ut_ad(!memcmp(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + u->page.frame, + TRX_UNDO_LOG_START + last, 2)); + + if (prev) + { + mtr->memcpy(*u, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + + u->page.frame, prev + TRX_UNDO_LOG_START, 2); + const ulint free= page_offset(last); + mtr->write<2>(*u, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + + u->page.frame, free); + mtr->write<2>(*u, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + u->page.frame, + TRX_UNDO_CACHED); + mtr->write<2>(*u, TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG + u->page.frame, + page_offset(prev)); + mtr->write<2>(*u, prev + TRX_UNDO_NEXT_LOG, 0U); + mtr->memset(u, free, srv_page_size - FIL_PAGE_DATA_END - free, 0); + + /* We may have updated PAGE_MAX_TRX_ID on secondary index pages + to this->id. Ensure that trx_sys.m_max_trx_id will be recovered + correctly, even though we removed our undo log record along + with the TRX_UNDO_TRX_ID above. */ + + /* Below, we are acquiring rseg_header->page.lock after + u->page.lock (the opposite of trx_purge_add_undo_to_history()). + This is fine, because both functions are holding exclusive + rseg->latch. */ + + if (mach_read_from_8(prev + TRX_UNDO_TRX_NO) >= id); + else if (buf_block_t *rseg_header= rseg->get(mtr, nullptr)) + { + byte *m= TRX_RSEG + TRX_RSEG_MAX_TRX_ID + rseg_header->page.frame; + + do + { + if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + + rseg_header->page.frame))) + /* This must have been upgraded from before MariaDB 10.3.5. */ + trx_rseg_format_upgrade(rseg_header, mtr); + else if (mach_read_from_8(m) >= id) + continue; + mtr->write<8>(*rseg_header, m, id); + } + while (0); + } + } + else + /* Our undo log header was right after the undo log segment header. + This page should have been created by trx_undo_create(), not + returned by trx_undo_reuse_cached(). + + We retain the dummy empty log in order to remain compatible with + trx_undo_mem_create_at_db_start(). This page will remain available + to trx_undo_reuse_cached(), and it will eventually be freed by + trx_purge_truncate_rseg_history(). */ + mtr->write<2>(*u, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + u->page.frame, + TRX_UNDO_CACHED); + break; + } + } + else + ut_ad("undo log page was not found" == 0); + + UT_LIST_REMOVE(rseg->undo_list, undo); + UT_LIST_ADD_FIRST(rseg->undo_cached, undo); + undo->state= TRX_UNDO_CACHED; + undo= nullptr; + + /* We must assign an "end" identifier even though we are not going + to persistently write it anywhere, to make sure that the purge of + history will not be stuck. */ + trx_sys.assign_new_trx_no(this); +} + /** Assign the transaction its history serialisation number and write the UNDO log to the assigned rollback segment. -@param trx persistent transaction @param mtr mini-transaction */ -static void trx_write_serialisation_history(trx_t *trx, mtr_t *mtr) +inline void trx_t::write_serialisation_history(mtr_t *mtr) { - ut_ad(!trx->read_only); - trx_rseg_t *rseg= trx->rsegs.m_redo.rseg; - trx_undo_t *&undo= trx->rsegs.m_redo.undo; + ut_ad(!read_only); + trx_rseg_t *rseg= rsegs.m_redo.rseg; + trx_undo_t *&undo= rsegs.m_redo.undo; if (UNIV_LIKELY(undo != nullptr)) { MONITOR_INC(MONITOR_TRX_COMMIT_UNDO); @@ -988,26 +1123,32 @@ static void trx_write_serialisation_history(trx_t *trx, mtr_t *mtr) ut_ad(undo->rseg == rseg); /* Assign the transaction serialisation number and add any undo log to the purge queue. */ - if (rseg->last_page_no == FIL_NULL) + if (UNIV_UNLIKELY(!undo_no)) + { + /* The transaction was rolled back. */ + commit_empty(mtr); + goto done; + } + else if (rseg->last_page_no == FIL_NULL) { mysql_mutex_lock(&purge_sys.pq_mutex); - trx_sys.assign_new_trx_no(trx); - const trx_id_t end{trx->rw_trx_hash_element->no}; - /* If the rollback segment is not empty, trx->no cannot be less - than any trx_t::no already in rseg. User threads only produce - events when a rollback segment is empty. */ + trx_sys.assign_new_trx_no(this); + const trx_id_t end{rw_trx_hash_element->no}; + /* end cannot be less than anything in rseg. User threads only + produce events when a rollback segment is empty. */ purge_sys.purge_queue.push(TrxUndoRsegs{end, *rseg}); mysql_mutex_unlock(&purge_sys.pq_mutex); rseg->last_page_no= undo->hdr_page_no; rseg->set_last_commit(undo->hdr_offset, end); } else - trx_sys.assign_new_trx_no(trx); + trx_sys.assign_new_trx_no(this); UT_LIST_REMOVE(rseg->undo_list, undo); /* Change the undo log segment state from TRX_UNDO_ACTIVE, to define the transaction as committed in the file based domain, at mtr->commit_lsn() obtained in mtr->commit() below. */ - trx_purge_add_undo_to_history(trx, undo, mtr); + trx_purge_add_undo_to_history(this, undo, mtr); + done: rseg->release(); rseg->latch.wr_unlock(); } @@ -1218,7 +1359,7 @@ ATTRIBUTE_NOINLINE static void trx_commit_cleanup(trx_undo_t *&undo) TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr) { - /* We already detached from rseg in trx_write_serialisation_history() */ + /* We already detached from rseg in write_serialisation_history() */ ut_ad(!rsegs.m_redo.undo); read_view.close(); @@ -1409,7 +1550,7 @@ TRANSACTIONAL_TARGET void trx_t::commit_low(mtr_t *mtr) different rollback segments. However, if a transaction T2 is able to see modifications made by a transaction T1, T2 will always get a bigger transaction number and a bigger commit lsn than T1. */ - trx_write_serialisation_history(this, mtr); + write_serialisation_history(mtr); } else if (trx_rseg_t *rseg= rsegs.m_redo.rseg) { diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 8354d04edc6..657a7c48ff1 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -497,8 +497,7 @@ trx_undo_seg_create(fil_space_t *space, buf_block_t *rseg_hdr, ulint *id, ut_ad(slot_no < TRX_RSEG_N_SLOTS); - *err = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO, - mtr); + *err = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO, mtr); if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { return NULL; } @@ -569,6 +568,7 @@ static uint16_t trx_undo_header_create(buf_block_t *undo_page, trx_id_t trx_id, start, 2); uint16_t prev_log= mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG + undo_page->page.frame); + ut_ad(prev_log < free); alignas(4) byte buf[4]; mach_write_to_2(buf, TRX_UNDO_ACTIVE); mach_write_to_2(buf + 2, free); @@ -1022,7 +1022,6 @@ corrupted_type: case TRX_UNDO_ACTIVE: case TRX_UNDO_PREPARED: if (UNIV_LIKELY(type != 1)) { - trx_no = trx_id + 1; break; } sql_print_error("InnoDB: upgrade from older version than"