From 3c09f148f362a587ac3267c31fd17da5f71a0b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 7 Jul 2017 13:08:16 +0300 Subject: [PATCH] MDEV-12288 Reset DB_TRX_ID when the history is removed, to speed up MVCC Let InnoDB purge reset DB_TRX_ID,DB_ROLL_PTR when the history is removed. [TODO: It appears that the resetting is not taking place as often as it could be. We should test that a simple INSERT should eventually cause row_purge_reset_trx_id() to be invoked unless DROP TABLE is invoked soon enough.] The InnoDB clustered index record system columns DB_TRX_ID,DB_ROLL_PTR are used by multi-versioning. After the history is no longer needed, these columns can safely be reset to 0 and 1<<55 (to indicate a fresh insert). When a reader sees 0 in the DB_TRX_ID column, it can instantly determine that the record is present the read view. There is no need to acquire the transaction system mutex to check if the transaction exists, because writes can never be conducted by a transaction whose ID is 0. The persistent InnoDB undo log used to be split into two parts: insert_undo and update_undo. The insert_undo log was discarded at transaction commit or rollback, and the update_undo log was processed by the purge subsystem. As part of this change, we will only generate a single undo log for new transactions, and the purge subsystem will reset the DB_TRX_ID whenever a clustered index record is touched. That is, all persistent undo log will be preserved at transaction commit or rollback, to be removed by purge. The InnoDB redo log format is changed in two ways: We remove the redo log record type MLOG_UNDO_HDR_REUSE, and we introduce the MLOG_ZIP_WRITE_TRX_ID record for updating the DB_TRX_ID,DB_ROLL_PTR in a ROW_FORMAT=COMPRESSED table. This is also changing the format of persistent InnoDB data files: undo log and clustered index leaf page records. It will still be possible via import and export to exchange data files with earlier versions of MariaDB. The change to clustered index leaf page records is simple: we allow DB_TRX_ID to be 0. When it comes to the undo log, we must be able to upgrade from earlier MariaDB versions after a clean shutdown (no redo log to apply). While it would be nice to perform a slow shutdown (innodb_fast_shutdown=0) before an upgrade, to empty the undo logs, we cannot assume that this has been done. So, separate insert_undo log may exist for recovered uncommitted transactions. These transactions may be automatically rolled back, or they may be in XA PREPARE state, in which case InnoDB will preserve the transaction until an explicit XA COMMIT or XA ROLLBACK. Upgrade has been tested by starting up MariaDB 10.2 with ./mysql-test-run --manual-gdb innodb.read_only_recovery and then starting up this patched server with and without --innodb-read-only. trx_undo_ptr_t::undo: Renamed from update_undo. trx_undo_ptr_t::old_insert: Renamed from insert_undo. trx_rseg_t::undo_list: Renamed from update_undo_list. trx_rseg_t::undo_cached: Merged from update_undo_cached and insert_undo_cached. trx_rseg_t::old_insert_list: Renamed from insert_undo_list. row_purge_reset_trx_id(): New function to reset the columns. This will be called for all undo processing in purge that does not remove the clustered index record. trx_undo_update_rec_get_update(): Allow trx_id=0 when copying the old DB_TRX_ID of the record to the undo log. ReadView::changes_visible(): Allow id==0. (Return true for it. This is what speeds up the MVCC.) row_vers_impl_x_locked_low(), row_vers_build_for_semi_consistent_read(): Implement a fast path for DB_TRX_ID=0. Always initialize the TRX_UNDO_PAGE_TYPE to 0. Remove undo->type. MLOG_UNDO_HDR_REUSE: Remove. This changes the redo log format! innobase_start_or_create_for_mysql(): Set srv_undo_sources before starting any transactions. The parsing of the MLOG_ZIP_WRITE_TRX_ID record was successfully tested by running the following: ./mtr --parallel=auto --mysqld=--debug=d,ib_log innodb_zip.bug56680 grep MLOG_ZIP_WRITE_TRX_ID var/*/log/mysqld.1.err --- mysql-test/suite/innodb_zip/r/bug56680.result | 3 + mysql-test/suite/innodb_zip/t/bug56680.test | 16 +- mysql-test/suite/innodb_zip/t/recover.test | 3 +- storage/innobase/buf/buf0buf.cc | 14 +- storage/innobase/include/mtr0types.h | 9 +- storage/innobase/include/page0zip.h | 41 +- storage/innobase/include/read0types.h | 2 - storage/innobase/include/trx0rseg.h | 20 +- storage/innobase/include/trx0trx.h | 19 +- storage/innobase/include/trx0undo.h | 23 +- storage/innobase/log/log0log.cc | 2 +- storage/innobase/log/log0recv.cc | 16 +- storage/innobase/page/page0zip.cc | 101 ++++- storage/innobase/row/row0import.cc | 7 +- storage/innobase/row/row0purge.cc | 124 ++++-- storage/innobase/row/row0trunc.cc | 7 +- storage/innobase/row/row0upd.cc | 3 +- storage/innobase/row/row0vers.cc | 17 + storage/innobase/srv/srv0start.cc | 33 +- storage/innobase/trx/trx0purge.cc | 16 +- storage/innobase/trx/trx0rec.cc | 41 +- storage/innobase/trx/trx0roll.cc | 52 ++- storage/innobase/trx/trx0rseg.cc | 28 +- storage/innobase/trx/trx0trx.cc | 130 +++--- storage/innobase/trx/trx0undo.cc | 396 +++++------------- 25 files changed, 517 insertions(+), 606 deletions(-) diff --git a/mysql-test/suite/innodb_zip/r/bug56680.result b/mysql-test/suite/innodb_zip/r/bug56680.result index 40660f435fb..02ec24c98ae 100644 --- a/mysql-test/suite/innodb_zip/r/bug56680.result +++ b/mysql-test/suite/innodb_zip/r/bug56680.result @@ -116,5 +116,8 @@ DF CHECK TABLE bug56680_2; Table Op Msg_type Msg_text test.bug56680_2 check status OK +CHECK TABLE bug56680_2; +Table Op Msg_type Msg_text +test.bug56680_2 check status OK DROP TABLE bug56680_2; DROP TABLE bug56680; diff --git a/mysql-test/suite/innodb_zip/t/bug56680.test b/mysql-test/suite/innodb_zip/t/bug56680.test index da37f6a28b4..f0dc21a1202 100644 --- a/mysql-test/suite/innodb_zip/t/bug56680.test +++ b/mysql-test/suite/innodb_zip/t/bug56680.test @@ -1,20 +1,18 @@ #Want to skip this test from daily Valgrind execution --source include/no_valgrind_without_big.inc +# Embedded server tests do not support restarting. +--source include/not_embedded.inc # # Bug #56680 InnoDB may return wrong results from a case-insensitive index # -- source include/innodb_page_size_small.inc -- disable_query_log -SET @tx_isolation_orig = @@tx_isolation; -SET @innodb_file_per_table_orig = @@innodb_file_per_table; # The flag innodb_change_buffering_debug is only available in debug builds. # It instructs InnoDB to try to evict pages from the buffer pool when # change buffering is possible, so that the change buffer will be used # whenever possible. -- error 0,ER_UNKNOWN_SYSTEM_VARIABLE -SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug; --- error 0,ER_UNKNOWN_SYSTEM_VARIABLE SET GLOBAL innodb_change_buffering_debug = 1; -- enable_query_log SET GLOBAL tx_isolation='REPEATABLE-READ'; @@ -129,11 +127,9 @@ INSERT INTO bug56680_2 VALUES(1,_latin1 0xdf,1); SELECT HEX(b) FROM bug56680_2 LIMIT 3; CHECK TABLE bug56680_2; +--let $shutdown_timeout=1 +--source include/restart_mysqld.inc + +CHECK TABLE bug56680_2; DROP TABLE bug56680_2; DROP TABLE bug56680; - --- disable_query_log -SET GLOBAL tx_isolation = @tx_isolation_orig; -SET GLOBAL innodb_file_per_table = @innodb_file_per_table_orig; --- error 0, ER_UNKNOWN_SYSTEM_VARIABLE -SET GLOBAL innodb_change_buffering_debug = @innodb_change_buffering_debug_orig; diff --git a/mysql-test/suite/innodb_zip/t/recover.test b/mysql-test/suite/innodb_zip/t/recover.test index 0cfc2b672fb..f32ff3d3783 100644 --- a/mysql-test/suite/innodb_zip/t/recover.test +++ b/mysql-test/suite/innodb_zip/t/recover.test @@ -1,5 +1,4 @@ ---source include/have_innodb.inc ---source include/have_innodb_max_16k.inc +--source include/innodb_page_size_small.inc --source include/not_embedded.inc --disable_query_log diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 61006c8d89d..c5f5720cdea 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1284,17 +1284,6 @@ buf_page_print( read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); } - if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_INSERT) { - fprintf(stderr, - "InnoDB: Page may be an insert undo log page\n"); - } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_UPDATE) { - fprintf(stderr, - "InnoDB: Page may be an update undo log page\n"); - } - switch (fil_page_get_type(read_buf)) { index_id_t index_id; case FIL_PAGE_INDEX: @@ -1311,6 +1300,9 @@ buf_page_print( << " in table " << index->table->name; } break; + case FIL_PAGE_UNDO_LOG: + fputs("InnoDB: Page may be an undo log page\n", stderr); + break; case FIL_PAGE_INODE: fputs("InnoDB: Page may be an 'inode' page\n", stderr); break; diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h index 0725a5405a4..c30b5492808 100644 --- a/storage/innobase/include/mtr0types.h +++ b/storage/innobase/include/mtr0types.h @@ -112,9 +112,6 @@ enum mlog_id_t { /** discard an update undo log header */ MLOG_UNDO_HDR_DISCARD = 23, - /** reuse an insert undo log header */ - MLOG_UNDO_HDR_REUSE = 24, - /** create an undo log header */ MLOG_UNDO_HDR_CREATE = 25, @@ -235,8 +232,12 @@ enum mlog_id_t { redo log about individual pages */ MLOG_INDEX_LOAD = 61, + /** write DB_TRX_ID,DB_ROLL_PTR to a clustered index leaf page + of a ROW_FORMAT=COMPRESSED table */ + MLOG_ZIP_WRITE_TRX_ID = 62, + /** biggest value (used in assertions) */ - MLOG_BIGGEST_TYPE = MLOG_INDEX_LOAD, + MLOG_BIGGEST_TYPE = MLOG_ZIP_WRITE_TRX_ID, /** log record for writing/updating crypt data of a tablespace */ diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h index c1d5443d9e5..6e0c097bbaf 100644 --- a/storage/innobase/include/page0zip.h +++ b/storage/innobase/include/page0zip.h @@ -340,18 +340,39 @@ page_zip_write_node_ptr( ulint ptr, /*!< in: node pointer */ mtr_t* mtr); /*!< in: mini-transaction, or NULL */ -/**********************************************************************//** -Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */ +/** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record. +@param[in,out] page_zip compressed page +@param[in,out] rec record +@param[in] offsets rec_get_offsets(rec, index) +@param[in] trx_id_field field number of DB_TRX_ID (number of PK fields) +@param[in] trx_id DB_TRX_ID value (transaction identifier) +@param[in] roll_ptr DB_ROLL_PTR value (undo log pointer) +@param[in,out] mtr mini-transaction, or NULL to skip logging */ void page_zip_write_trx_id_and_roll_ptr( -/*===============================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - byte* rec, /*!< in/out: record */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint trx_id_col,/*!< in: column number of TRX_ID in rec */ - trx_id_t trx_id, /*!< in: transaction identifier */ - roll_ptr_t roll_ptr)/*!< in: roll_ptr */ - MY_ATTRIBUTE((nonnull)); + page_zip_des_t* page_zip, + byte* rec, + const ulint* offsets, + ulint trx_id_col, + trx_id_t trx_id, + roll_ptr_t roll_ptr, + mtr_t* mtr = NULL) + MY_ATTRIBUTE((nonnull(1,2,3))); + +/** Parse a MLOG_ZIP_WRITE_TRX_ID record. +@param[in] ptr redo log buffer +@param[in] end_ptr end of redo log buffer +@param[in,out] page uncompressed page +@param[in,out] page_zip compressed page +@return end of log record +@retval NULL if the log record is incomplete */ +byte* +page_zip_parse_write_trx_id( + byte* ptr, + byte* end_ptr, + page_t* page, + page_zip_des_t* page_zip) + MY_ATTRIBUTE((nonnull(1,2), warn_unused_result)); /**********************************************************************//** Write the "deleted" flag of a record on a compressed page. The flag must diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h index c83c7e04f11..8056dbf437f 100644 --- a/storage/innobase/include/read0types.h +++ b/storage/innobase/include/read0types.h @@ -163,8 +163,6 @@ public: const table_name_t& name) const MY_ATTRIBUTE((warn_unused_result)) { - ut_ad(id > 0); - if (id < m_up_limit_id || id == m_creator_trx_id) { return(true); diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index 3078aa8faf1..f14dadd7999 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -162,20 +162,16 @@ struct trx_rseg_t { ulint curr_size; /*--------------------------------------------------------*/ - /* Fields for update undo logs */ - /** List of update undo logs */ - UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list; + /* Fields for undo logs */ + /** List of undo logs */ + UT_LIST_BASE_NODE_T(trx_undo_t) undo_list; - /** List of update undo log segments cached for fast reuse */ - UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached; + /** List of undo log segments cached for fast reuse */ + UT_LIST_BASE_NODE_T(trx_undo_t) undo_cached; - /*--------------------------------------------------------*/ - /* Fields for insert undo logs */ - /** List of insert undo logs */ - UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list; - - /** List of insert undo log segments cached for fast reuse */ - UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached; + /** List of recovered old insert_undo logs of incomplete + transactions (to roll back or XA COMMIT & purge) */ + UT_LIST_BASE_NODE_T(trx_undo_t) old_insert_list; /*--------------------------------------------------------*/ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index b2d4952318c..69bea016605 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -842,10 +842,12 @@ struct trx_undo_ptr_t { trx_rseg_t* rseg; /*!< rollback segment assigned to the transaction, or NULL if not assigned yet */ - trx_undo_t* insert_undo; /*!< pointer to the insert undo log, or - NULL if no inserts performed yet */ - trx_undo_t* update_undo; /*!< pointer to the update undo log, or - NULL if no update performed yet */ + trx_undo_t* undo; /*!< pointer to the undo log, or + NULL if nothing logged yet */ + trx_undo_t* old_insert; /*!< pointer to recovered + insert undo log, or NULL if no + INSERT transactions were + recovered from old-format undo logs */ }; /** An instance of temporary rollback segment. */ @@ -1274,7 +1276,7 @@ struct trx_t { /** @return whether any persistent undo log has been generated */ bool has_logged_persistent() const { - return(rsegs.m_redo.insert_undo || rsegs.m_redo.update_undo); + return(rsegs.m_redo.undo); } /** @return whether any undo log has been generated */ @@ -1283,6 +1285,13 @@ struct trx_t { return(has_logged_persistent() || rsegs.m_noredo.undo); } + /** @return whether any undo log has been generated or + recovered */ + bool has_logged_or_recovered() const + { + return(has_logged() || rsegs.m_redo.old_insert); + } + /** @return rollback segment for modifying temporary tables */ trx_rseg_t* get_temp_rseg() { diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index cdf97b6a9cf..1302d3fd6df 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -257,18 +257,13 @@ A new undo log is created or a cached undo log reused. @param[in,out] trx transaction @param[in] rseg rollback segment @param[out] undo the undo log -@param[in] type TRX_UNDO_INSERT or TRX_UNDO_UPDATE @retval DB_SUCCESS on success @retval DB_TOO_MANY_CONCURRENT_TRXS @retval DB_OUT_OF_FILE_SPACE @retval DB_READ_ONLY @retval DB_OUT_OF_MEMORY */ dberr_t -trx_undo_assign_undo( - trx_t* trx, - trx_rseg_t* rseg, - trx_undo_t** undo, - ulint type) +trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) MY_ATTRIBUTE((nonnull, warn_unused_result)); /******************************************************************//** Sets the state of the undo log segment at a transaction finish. @@ -281,7 +276,7 @@ trx_undo_set_state_at_finish( /** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK. @param[in,out] trx transaction -@param[in,out] undo insert_undo or update_undo log +@param[in,out] undo undo log @param[in] rollback false=XA PREPARE, true=XA ROLLBACK @param[in,out] mtr mini-transaction @return undo log segment header page, x-latched */ @@ -305,7 +300,7 @@ trx_undo_update_cleanup( x-latched */ mtr_t* mtr); /*!< in: mtr */ -/** Free an insert or temporary undo log after commit or rollback. +/** Free an old insert or temporary undo log after commit or rollback. The information is not needed after a commit or rollback, therefore the data can be discarded. @param[in,out] undo undo log @@ -343,8 +338,7 @@ trx_undo_parse_page_init( const byte* end_ptr,/*!< in: buffer end */ page_t* page, /*!< in: page or NULL */ mtr_t* mtr); /*!< in: mtr or NULL */ -/** Parse the redo log entry of an undo log page header create or reuse. -@param[in] type MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE +/** Parse the redo log entry of an undo log page header create. @param[in] ptr redo log record @param[in] end_ptr end of log buffer @param[in,out] page page frame or NULL @@ -352,7 +346,6 @@ trx_undo_parse_page_init( @return end of log record or NULL */ byte* trx_undo_parse_page_header( - mlog_id_t type, const byte* ptr, const byte* end_ptr, page_t* page, @@ -402,8 +395,6 @@ struct trx_undo_t { /*-----------------------------*/ ulint id; /*!< undo log slot number within the rollback segment */ - ulint type; /*!< TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ ulint state; /*!< state of the corresponding undo log segment */ ibool del_marks; /*!< relevant only in an update undo @@ -460,8 +451,8 @@ struct trx_undo_t { /*-------------------------------------------------------------*/ /** Transaction undo log page header offsets */ /* @{ */ -#define TRX_UNDO_PAGE_TYPE 0 /*!< TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ +#define TRX_UNDO_PAGE_TYPE 0 /*!< unused; 0 (before MariaDB 10.3.1: + TRX_UNDO_INSERT or TRX_UNDO_UPDATE) */ #define TRX_UNDO_PAGE_START 2 /*!< Byte offset where the undo log records for the LATEST transaction start on this page (remember that @@ -559,7 +550,7 @@ page of an update undo log segment. */ #define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE) /* Note: the writing of the undo log old header is coded by a log record -MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the +MLOG_UNDO_HDR_CREATE. The appending of an XID to the header is logged separately. In this sense, the XID is not really a member of the undo log header. TODO: do not append the XID to the log header if XA is not needed by the user. The XID wastes about 150 bytes of space in every diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 9a61e2067a4..13ec675a9e5 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -68,7 +68,7 @@ c-function and its parameters are written to the log to reduce the size of the log. 3a) You should not add parameters to these kind of functions - (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse()) + (e.g. trx_undo_header_create()) 3b) You should not add such functionality which either change working when compared with the old or are dependent on data diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 84612fd2923..5ac84c6619b 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1424,10 +1424,8 @@ parse_log: ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr); break; case MLOG_UNDO_HDR_CREATE: - case MLOG_UNDO_HDR_REUSE: ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG); - ptr = trx_undo_parse_page_header(type, ptr, end_ptr, - page, mtr); + ptr = trx_undo_parse_page_header(ptr, end_ptr, page, mtr); break; case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK: ut_ad(!page || fil_page_type_is_index(page_type)); @@ -1496,6 +1494,12 @@ parse_log: ptr, end_ptr, page, page_zip, index); } break; + case MLOG_ZIP_WRITE_TRX_ID: + /* This must be a clustered index leaf page. */ + ut_ad(!page || page_type == FIL_PAGE_INDEX); + ptr = page_zip_parse_write_trx_id(ptr, end_ptr, + page, page_zip); + break; case MLOG_FILE_WRITE_CRYPT_DATA: dberr_t err; ptr = const_cast(fil_parse_write_crypt_data(ptr, end_ptr, block, &err)); @@ -3654,9 +3658,6 @@ get_mlog_string(mlog_id_t type) case MLOG_UNDO_HDR_DISCARD: return("MLOG_UNDO_HDR_DISCARD"); - case MLOG_UNDO_HDR_REUSE: - return("MLOG_UNDO_HDR_REUSE"); - case MLOG_UNDO_HDR_CREATE: return("MLOG_UNDO_HDR_CREATE"); @@ -3737,6 +3738,9 @@ get_mlog_string(mlog_id_t type) case MLOG_ZIP_PAGE_REORGANIZE: return("MLOG_ZIP_PAGE_REORGANIZE"); + case MLOG_ZIP_WRITE_TRX_ID: + return("MLOG_ZIP_WRITE_TRX_ID"); + case MLOG_FILE_RENAME2: return("MLOG_FILE_RENAME2"); diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index ffb8808d6b4..b367a32a8d7 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -4140,17 +4140,23 @@ page_zip_write_node_ptr( } } -/**********************************************************************//** -Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */ +/** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record. +@param[in,out] page_zip compressed page +@param[in,out] rec record +@param[in] offsets rec_get_offsets(rec, index) +@param[in] trx_id_field field number of DB_TRX_ID (number of PK fields) +@param[in] trx_id DB_TRX_ID value (transaction identifier) +@param[in] roll_ptr DB_ROLL_PTR value (undo log pointer) +@param[in,out] mtr mini-transaction, or NULL to skip logging */ void page_zip_write_trx_id_and_roll_ptr( -/*===============================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - byte* rec, /*!< in/out: record */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint trx_id_col,/*!< in: column number of TRX_ID in rec */ - trx_id_t trx_id, /*!< in: transaction identifier */ - roll_ptr_t roll_ptr)/*!< in: roll_ptr */ + page_zip_des_t* page_zip, + byte* rec, + const ulint* offsets, + ulint trx_id_col, + trx_id_t trx_id, + roll_ptr_t roll_ptr, + mtr_t* mtr) { byte* field; byte* storage; @@ -4202,6 +4208,83 @@ page_zip_write_trx_id_and_roll_ptr( UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), rec_offs_extra_size(offsets)); UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); + + if (mtr) { + byte* log_ptr = mlog_open( + mtr, 11 + 2 + 2 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + if (UNIV_UNLIKELY(!log_ptr)) { + return; + } + + log_ptr = mlog_write_initial_log_record_fast( + (byte*) field, MLOG_ZIP_WRITE_TRX_ID, log_ptr, mtr); + mach_write_to_2(log_ptr, page_offset(field)); + log_ptr += 2; + mach_write_to_2(log_ptr, storage - page_zip->data); + log_ptr += 2; + memcpy(log_ptr, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + log_ptr += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; + mlog_close(mtr, log_ptr); + } +} + +/** Parse a MLOG_ZIP_WRITE_TRX_ID record. +@param[in] ptr redo log buffer +@param[in] end_ptr end of redo log buffer +@param[in,out] page uncompressed page +@param[in,out] page_zip compressed page +@return end of log record +@retval NULL if the log record is incomplete */ +byte* +page_zip_parse_write_trx_id( + byte* ptr, + byte* end_ptr, + page_t* page, + page_zip_des_t* page_zip) +{ + byte* const end = 2 + 2 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + ptr; + + if (UNIV_UNLIKELY(end_ptr < end)) { + return(NULL); + } + + uint offset = mach_read_from_2(ptr); + uint z_offset = mach_read_from_2(ptr + 2); + + if (offset < PAGE_ZIP_START + || offset >= UNIV_PAGE_SIZE + || z_offset >= UNIV_PAGE_SIZE) { +corrupt: + recv_sys->found_corrupt_log = TRUE; + + return(NULL); + } + + if (page) { + if (!page_zip || !page_is_leaf(page)) { + goto corrupt; + } + +#ifdef UNIV_ZIP_DEBUG + ut_a(page_zip_validate(page_zip, page, NULL)); +#endif /* UNIV_ZIP_DEBUG */ + + byte* field = page + offset; + byte* storage = page_zip->data + z_offset; + + if (storage >= page_zip_dir_start(page_zip)) { + goto corrupt; + } + + memcpy(field, ptr + 4, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + memcpy(storage, ptr + 4, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + +#ifdef UNIV_ZIP_DEBUG + ut_a(page_zip_validate(page_zip, page, NULL)); +#endif /* UNIV_ZIP_DEBUG */ + } + + return end; } /**********************************************************************//** diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 45ca1582e49..0b538660c9b 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -3380,9 +3380,8 @@ row_import_for_mysql( mutex_enter(&trx->undo_mutex); /* TODO: Do not write any undo log for the IMPORT cleanup. */ - trx_undo_t** pundo = &trx->rsegs.m_redo.update_undo; - err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg, pundo, - TRX_UNDO_UPDATE); + err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg, + &trx->rsegs.m_redo.undo); mutex_exit(&trx->undo_mutex); @@ -3393,7 +3392,7 @@ row_import_for_mysql( return(row_import_cleanup(prebuilt, trx, err)); - } else if (trx->rsegs.m_redo.update_undo == 0) { + } else if (trx->rsegs.m_redo.undo == 0) { err = DB_TOO_MANY_CONCURRENT_TRXS; return(row_import_cleanup(prebuilt, trx, err)); diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index cecd127e71e..ac8ae0002e6 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -663,6 +663,75 @@ row_purge_del_mark( return(row_purge_remove_clust_if_poss(node)); } +/** Reset DB_TRX_ID, DB_ROLL_PTR of a clustered index record +whose old history can no longer be observed. +@param[in,out] node purge node +@param[in,out] mtr mini-transaction (will be started and committed) */ +static +void +row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr) +{ + ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)); + /* Reset DB_TRX_ID, DB_ROLL_PTR for old records. */ + mtr->start(); + + if (row_purge_reposition_pcur(BTR_MODIFY_LEAF, node, mtr)) { + dict_index_t* index = dict_table_get_first_index( + node->table); + ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1; + rec_t* rec = btr_pcur_get_rec(&node->pcur); + mem_heap_t* heap = NULL; + /* Reserve enough offsets for the PRIMARY KEY and 2 columns + so that we can access DB_TRX_ID, DB_ROLL_PTR. */ + ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2]; + rec_offs_init(offsets_); + ulint* offsets = rec_get_offsets( + rec, index, offsets_, trx_id_pos + 2, &heap); + ut_ad(heap == NULL); + + ut_ad(dict_index_get_nth_field(index, trx_id_pos) + ->col->mtype == DATA_SYS); + ut_ad(dict_index_get_nth_field(index, trx_id_pos) + ->col->prtype == (DATA_TRX_ID | DATA_NOT_NULL)); + ut_ad(dict_index_get_nth_field(index, trx_id_pos + 1) + ->col->mtype == DATA_SYS); + ut_ad(dict_index_get_nth_field(index, trx_id_pos + 1) + ->col->prtype == (DATA_ROLL_PTR | DATA_NOT_NULL)); + + /* Only update the record if DB_ROLL_PTR matches (the + record has not been modified after this transaction + became purgeable) */ + if (node->roll_ptr + == row_get_rec_roll_ptr(rec, index, offsets)) { + ut_ad(!rec_get_deleted_flag(rec, + rec_offs_comp(offsets))); + mtr->set_named_space(index->space); + if (page_zip_des_t* page_zip + = buf_block_get_page_zip( + btr_pcur_get_block(&node->pcur))) { + page_zip_write_trx_id_and_roll_ptr( + page_zip, rec, offsets, trx_id_pos, + 0, 1ULL << ROLL_PTR_INSERT_FLAG_POS, + mtr); + } else { + ulint len; + byte* ptr = rec_get_nth_field( + rec, offsets, trx_id_pos, &len); + ut_ad(len == DATA_TRX_ID_LEN); + memset(ptr, 0, DATA_TRX_ID_LEN + + DATA_ROLL_PTR_LEN); + ptr[DATA_TRX_ID_LEN] = 1U + << (ROLL_PTR_INSERT_FLAG_POS - CHAR_BIT + * (DATA_ROLL_PTR_LEN - 1)); + mlog_log_string(ptr, DATA_TRX_ID_LEN + + DATA_ROLL_PTR_LEN, mtr); + } + } + } + + mtr->commit(); +} + /***********************************************************//** Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ @@ -713,6 +782,8 @@ row_purge_upd_exist_or_extern_func( mem_heap_free(heap); skip_secondaries: + mtr_t mtr; + dict_index_t* index = dict_table_get_first_index(node->table); /* Free possible externally stored fields */ for (ulint i = 0; i < upd_get_n_fields(node->update); i++) { @@ -724,12 +795,10 @@ skip_secondaries: buf_block_t* block; ulint internal_offset; byte* data_field; - dict_index_t* index; ibool is_insert; ulint rseg_id; ulint page_no; ulint offset; - mtr_t mtr; /* We use the fact that new_val points to undo_rec and get thus the offset of @@ -759,7 +828,6 @@ skip_secondaries: /* We have to acquire an SX-latch to the clustered index tree (exclude other tree changes) */ - index = dict_table_get_first_index(node->table); mtr_sx_lock(dict_index_get_lock(index), &mtr); mtr.set_named_space(index->space); @@ -794,6 +862,8 @@ skip_secondaries: mtr_commit(&mtr); } } + + row_purge_reset_trx_id(node, &mtr); } #ifdef UNIV_DEBUG @@ -819,10 +889,8 @@ row_purge_parse_undo_rec( { dict_index_t* clust_index; byte* ptr; - trx_t* trx; undo_no_t undo_no; table_id_t table_id; - trx_id_t trx_id; roll_ptr_t roll_ptr; ulint info_bits; ulint type; @@ -836,16 +904,22 @@ row_purge_parse_undo_rec( node->rec_type = type; - if (type == TRX_UNDO_UPD_DEL_REC && !*updated_extern) { - - return(false); + switch (type) { + case TRX_UNDO_INSERT_REC: + break; + default: +#ifdef UNIV_DEBUG + ut_ad(0); + return false; + case TRX_UNDO_UPD_DEL_REC: + case TRX_UNDO_UPD_EXIST_REC: + case TRX_UNDO_DEL_MARK_REC: +#endif /* UNIV_DEBUG */ + ptr = trx_undo_update_rec_get_sys_cols(ptr, &node->trx_id, + &roll_ptr, &info_bits); + break; } - ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, - &info_bits); - node->table = NULL; - node->trx_id = trx_id; - /* Prevent DROP TABLE etc. from running when we are doing the purge for this row */ @@ -868,7 +942,8 @@ try_again: goto err_exit; } - if (node->table->n_v_cols && !node->table->vc_templ + if (type != TRX_UNDO_INSERT_REC + && node->table->n_v_cols && !node->table->vc_templ && dict_table_has_indexed_v_cols(node->table)) { /* Need server fully up for virtual column computation */ if (!mysqld_server_started) { @@ -893,28 +968,23 @@ try_again: /* The table was corrupt in the data dictionary. dict_set_corrupted() works on an index, and we do not have an index to call it with. */ -close_exit: dict_table_close(node->table, FALSE, FALSE); err_exit: rw_lock_s_unlock(dict_operation_lock); return(false); } - if (type == TRX_UNDO_UPD_EXIST_REC - && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) - && !*updated_extern) { - - /* Purge requires no changes to indexes: we may return */ - goto close_exit; - } - ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), node->heap); - trx = thr_get_trx(thr); + if (type == TRX_UNDO_INSERT_REC) { + return(true); + } - ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, - roll_ptr, info_bits, trx, + ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, + node->trx_id, + roll_ptr, info_bits, + thr_get_trx(thr), node->heap, &(node->update)); /* Read to the partial row the fields that occur in indexes */ @@ -967,6 +1037,8 @@ row_purge_record_func( break; default: if (!updated_extern) { + mtr_t mtr; + row_purge_reset_trx_id(node, &mtr); break; } /* fall through */ diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc index 5724fad801f..a3658a0bbc4 100644 --- a/storage/innobase/row/row0trunc.cc +++ b/storage/innobase/row/row0trunc.cc @@ -1841,11 +1841,8 @@ row_truncate_table_for_mysql( till some point. Associate rollback segment to record undo log. */ if (!dict_table_is_temporary(table)) { mutex_enter(&trx->undo_mutex); - - trx_undo_t** pundo = &trx->rsegs.m_redo.update_undo; - err = trx_undo_assign_undo( - trx, trx->rsegs.m_redo.rseg, pundo, TRX_UNDO_UPDATE); - + err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg, + &trx->rsegs.m_redo.undo); mutex_exit(&trx->undo_mutex); DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log", diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index b639fd583c4..deee22be7b7 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -2582,8 +2582,7 @@ row_upd_clust_rec_by_insert_inherit_func( data[BTR_EXTERN_LEN] &= ~BTR_EXTERN_OWNER_FLAG; data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG; /* The BTR_EXTERN_INHERITED_FLAG only matters in - rollback of a fresh insert (insert_undo log). - Purge (operating on update_undo log) will always free + rollback of a fresh insert. Purge will always free the extern fields of a delete-marked row. */ inherit = true; diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc index de33c7c4d1b..26f6739c3ae 100644 --- a/storage/innobase/row/row0vers.cc +++ b/storage/innobase/row/row0vers.cc @@ -97,12 +97,25 @@ row_vers_impl_x_locked_low( ut_ad(rec_offs_validate(rec, index, offsets)); + if (ulint trx_id_offset = clust_index->trx_id_offset) { + trx_id = mach_read_from_6(clust_rec + trx_id_offset); + if (trx_id == 0) { + /* The transaction history was already purged. */ + DBUG_RETURN(0); + } + } + heap = mem_heap_create(1024); clust_offsets = rec_get_offsets( clust_rec, clust_index, NULL, ULINT_UNDEFINED, &heap); trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets); + if (trx_id == 0) { + /* The transaction history was already purged. */ + mem_heap_free(heap); + DBUG_RETURN(0); + } corrupt = FALSE; trx_t* trx = trx_rw_is_active(trx_id, &corrupt, true); @@ -1262,6 +1275,10 @@ row_vers_build_for_semi_consistent_read( rec_trx_id = version_trx_id; } + if (!version_trx_id) { + goto committed_version_trx; + } + trx_sys_mutex_enter(); version_trx = trx_get_rw_trx_by_id(version_trx_id); /* Because version_trx is a read-write transaction, diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 95e35749e3c..529bdd46c49 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -2543,6 +2543,20 @@ files_checked: thread_started[4 + SRV_MAX_N_IO_THREADS] = true; srv_start_state |= SRV_START_STATE_LOCK_SYS | SRV_START_STATE_MONITOR; + + ut_a(trx_purge_state() == PURGE_STATE_INIT); + + if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { + srv_undo_sources = true; + /* Create the dict stats gathering thread */ + srv_dict_stats_thread_active = true; + dict_stats_thread_handle = os_thread_create( + dict_stats_thread, NULL, NULL); + + /* Create the thread that will optimize the + FULLTEXT search index subsystem. */ + fts_optimize_init(); + } } /* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */ @@ -2576,16 +2590,7 @@ files_checked: } trx_temp_rseg_create(); - } - srv_is_being_started = false; - - ut_a(trx_purge_state() == PURGE_STATE_INIT); - - /* Create the master thread which does purge and other utility - operations */ - - if (!srv_read_only_mode) { thread_handles[1 + SRV_MAX_N_IO_THREADS] = os_thread_create( srv_master_thread, NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS)); @@ -2593,16 +2598,10 @@ files_checked: srv_start_state_set(SRV_START_STATE_MASTER); } + srv_is_being_started = false; + if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { - srv_undo_sources = true; - /* Create the dict stats gathering thread */ - srv_dict_stats_thread_active = true; - dict_stats_thread_handle = os_thread_create( - dict_stats_thread, NULL, NULL); - - /* Create the thread that will optimize the FTS sub-system. */ - fts_optimize_init(); thread_handles[5 + SRV_MAX_N_IO_THREADS] = os_thread_create( srv_purge_coordinator_thread, diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 317087173c5..c046c8b7b52 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -248,7 +248,7 @@ trx_purge_add_update_undo_to_history( x-latched */ mtr_t* mtr) /*!< in: mtr */ { - trx_undo_t* undo = trx->rsegs.m_redo.update_undo; + trx_undo_t* undo = trx->rsegs.m_redo.undo; trx_rseg_t* rseg = undo->rseg; trx_rsegf_t* rseg_header = trx_rsegf_get( rseg->space, rseg->page_no, mtr); @@ -954,19 +954,7 @@ trx_purge_initiate_truncate( ulint cached_undo_size = 0; for (trx_undo_t* undo = - UT_LIST_GET_FIRST(rseg->update_undo_cached); - undo != NULL && all_free; - undo = UT_LIST_GET_NEXT(undo_list, undo)) { - - if (limit->trx_no < undo->trx_id) { - all_free = false; - } else { - cached_undo_size += undo->size; - } - } - - for (trx_undo_t* undo = - UT_LIST_GET_FIRST(rseg->insert_undo_cached); + UT_LIST_GET_FIRST(rseg->undo_cached); undo != NULL && all_free; undo = UT_LIST_GET_NEXT(undo_list, undo)) { diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index a60e42dcea4..d9e506d3eb3 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -469,8 +469,8 @@ trx_undo_page_report_insert( ulint i; ut_ad(dict_index_is_clust(index)); - ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT); + ut_ad(*reinterpret_cast(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + + undo_page) == 0); first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE); @@ -875,13 +875,10 @@ trx_undo_page_report_modify( ut_a(dict_index_is_clust(index)); ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE - + undo_page) == TRX_UNDO_UPDATE - || (dict_table_is_temporary(table) - && mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE - + undo_page) == TRX_UNDO_INSERT)); - trx_undo_t* update_undo = dict_table_is_temporary(table) - ? NULL : trx->rsegs.m_redo.update_undo; + ut_ad(*reinterpret_cast(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + + undo_page) == 0); + trx_undo_t* undo = dict_table_is_temporary(table) + ? NULL : trx->rsegs.m_redo.undo; first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE); @@ -1109,8 +1106,8 @@ trx_undo_page_report_modify( /* Notify purge that it eventually has to free the old externally stored field */ - if (update_undo) { - update_undo->del_marks = TRUE; + if (undo) { + undo->del_marks = TRUE; } *type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN; @@ -1180,8 +1177,8 @@ trx_undo_page_report_modify( double mbr[SPDIMS * 2]; mem_heap_t* row_heap = NULL; - if (update_undo) { - update_undo->del_marks = TRUE; + if (undo) { + undo->del_marks = TRUE; } if (trx_undo_left(undo_page, ptr) < 5) { @@ -1485,7 +1482,7 @@ trx_undo_update_rec_get_update( buf = static_cast(mem_heap_alloc(heap, DATA_TRX_ID_LEN)); - trx_write_trx_id(buf, trx_id); + mach_write_to_6(buf, trx_id); upd_field_set_field_no(upd_field, dict_index_get_sys_col_pos(index, DATA_TRX_ID), @@ -1901,23 +1898,13 @@ trx_undo_report_row_operation( not listed there. */ trx->mod_tables.insert(index->table); - pundo = !rec - ? &trx->rsegs.m_redo.insert_undo - : &trx->rsegs.m_redo.update_undo; + pundo = &trx->rsegs.m_redo.undo; rseg = trx->rsegs.m_redo.rseg; } mutex_enter(&trx->undo_mutex); - dberr_t err; - - if (*pundo) { - err = DB_SUCCESS; - } else if (!rec || is_temp) { - err = trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_INSERT); - } else { - err = trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_UPDATE); - } - + dberr_t err = *pundo ? DB_SUCCESS : trx_undo_assign_undo( + trx, rseg, pundo); trx_undo_t* undo = *pundo; ut_ad((err == DB_SUCCESS) == (undo != NULL)); diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index d6857b892da..946c90f457c 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -102,7 +102,7 @@ trx_rollback_to_savepoint_low( trx->error_state = DB_SUCCESS; - if (trx->has_logged()) { + if (trx->has_logged_or_recovered()) { ut_ad(trx->rsegs.m_redo.rseg != 0 || trx->rsegs.m_noredo.rseg != 0); @@ -213,24 +213,27 @@ trx_rollback_low( case TRX_STATE_PREPARED: ut_ad(!trx_is_autocommit_non_locking(trx)); - if (trx->has_logged_persistent()) { + if (trx->rsegs.m_redo.undo || trx->rsegs.m_redo.old_insert) { /* Change the undo log state back from TRX_UNDO_PREPARED to TRX_UNDO_ACTIVE so that if the system gets killed, recovery will perform the rollback. */ - trx_undo_ptr_t* undo_ptr = &trx->rsegs.m_redo; + ut_ad(!trx->rsegs.m_redo.undo + || trx->rsegs.m_redo.undo->rseg + == trx->rsegs.m_redo.rseg); + ut_ad(!trx->rsegs.m_redo.old_insert + || trx->rsegs.m_redo.old_insert->rseg + == trx->rsegs.m_redo.rseg); mtr_t mtr; mtr.start(); mutex_enter(&trx->rsegs.m_redo.rseg->mutex); - if (undo_ptr->insert_undo != NULL) { - trx_undo_set_state_at_prepare( - trx, undo_ptr->insert_undo, - true, &mtr); + if (trx_undo_t* undo = trx->rsegs.m_redo.undo) { + trx_undo_set_state_at_prepare(trx, undo, true, + &mtr); } - if (undo_ptr->update_undo != NULL) { - trx_undo_set_state_at_prepare( - trx, undo_ptr->update_undo, - true, &mtr); + if (trx_undo_t* undo = trx->rsegs.m_redo.old_insert) { + trx_undo_set_state_at_prepare(trx, undo, true, + &mtr); } mutex_exit(&trx->rsegs.m_redo.rseg->mutex); /* Persist the XA ROLLBACK, so that crash @@ -899,20 +902,12 @@ trx_roll_try_truncate(trx_t* trx) trx->pages_undone = 0; undo_no_t undo_no = trx->undo_no; - trx_undo_t* insert_undo = trx->rsegs.m_redo.insert_undo; - trx_undo_t* update_undo = trx->rsegs.m_redo.update_undo; - if (insert_undo || update_undo) { - mutex_enter(&trx->rsegs.m_redo.rseg->mutex); - if (insert_undo) { - ut_ad(insert_undo->rseg == trx->rsegs.m_redo.rseg); - trx_undo_truncate_end(insert_undo, undo_no, false); - } - if (update_undo) { - ut_ad(update_undo->rseg == trx->rsegs.m_redo.rseg); - trx_undo_truncate_end(update_undo, undo_no, false); - } - mutex_exit(&trx->rsegs.m_redo.rseg->mutex); + if (trx_undo_t* undo = trx->rsegs.m_redo.undo) { + ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); + mutex_enter(&undo->rseg->mutex); + trx_undo_truncate_end(undo, undo_no, false); + mutex_exit(&undo->rseg->mutex); } if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { @@ -987,8 +982,8 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap) } trx_undo_t* undo; - trx_undo_t* insert = trx->rsegs.m_redo.insert_undo; - trx_undo_t* update = trx->rsegs.m_redo.update_undo; + trx_undo_t* insert = trx->rsegs.m_redo.old_insert; + trx_undo_t* update = trx->rsegs.m_redo.undo; trx_undo_t* temp = trx->rsegs.m_noredo.undo; const undo_no_t limit = trx->roll_limit; @@ -999,7 +994,8 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap) ut_ad(!update || !temp || update->empty || temp->empty || update->top_undo_no != temp->top_undo_no); - if (insert && !insert->empty && limit <= insert->top_undo_no) { + if (UNIV_LIKELY_NULL(insert) + && !insert->empty && limit <= insert->top_undo_no) { if (update && !update->empty && update->top_undo_no > insert->top_undo_no) { undo = update; @@ -1033,7 +1029,7 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap) trx_undo_rec_t* undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr); const undo_no_t undo_no = trx_undo_rec_get_undo_no(undo_rec); if (trx_undo_rec_get_type(undo_rec) == TRX_UNDO_INSERT_REC) { - ut_ad(undo == insert || undo == temp); + ut_ad(undo == insert || undo == update || undo == temp); *roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS; } else { ut_ad(undo == update || undo == temp); diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc index b48f3e18f1b..5470fb4baf4 100644 --- a/storage/innobase/trx/trx0rseg.cc +++ b/storage/innobase/trx/trx0rseg.cc @@ -115,29 +115,16 @@ trx_rseg_mem_free(trx_rseg_t* rseg) mutex_free(&rseg->mutex); /* There can't be any active transactions. */ - ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0); - ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0); + ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0); + ut_a(UT_LIST_GET_LEN(rseg->old_insert_list) == 0); - for (undo = UT_LIST_GET_FIRST(rseg->update_undo_cached); + for (undo = UT_LIST_GET_FIRST(rseg->undo_cached); undo != NULL; undo = next_undo) { next_undo = UT_LIST_GET_NEXT(undo_list, undo); - UT_LIST_REMOVE(rseg->update_undo_cached, undo); - - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); - - trx_undo_mem_free(undo); - } - - for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached); - undo != NULL; - undo = next_undo) { - - next_undo = UT_LIST_GET_NEXT(undo_list, undo); - - UT_LIST_REMOVE(rseg->insert_undo_cached, undo); + UT_LIST_REMOVE(rseg->undo_cached, undo); MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); @@ -167,10 +154,9 @@ trx_rseg_mem_create(ulint id, ulint space, ulint page_no) ? LATCH_ID_REDO_RSEG : LATCH_ID_NOREDO_RSEG, &rseg->mutex); - UT_LIST_INIT(rseg->update_undo_list, &trx_undo_t::undo_list); - UT_LIST_INIT(rseg->update_undo_cached, &trx_undo_t::undo_list); - UT_LIST_INIT(rseg->insert_undo_list, &trx_undo_t::undo_list); - UT_LIST_INIT(rseg->insert_undo_cached, &trx_undo_t::undo_list); + UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list); + UT_LIST_INIT(rseg->old_insert_list, &trx_undo_t::undo_list); + UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list); return(rseg); } diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 6655be72ba1..1544480cac0 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -717,8 +717,6 @@ void trx_resurrect_table_locks( /*======================*/ trx_t* trx, /*!< in/out: transaction */ - const trx_undo_ptr_t* undo_ptr, - /*!< in: pointer to undo segment. */ const trx_undo_t* undo) /*!< in: undo log */ { mtr_t mtr; @@ -726,8 +724,6 @@ trx_resurrect_table_locks( trx_undo_rec_t* undo_rec; table_id_set tables; - ut_ad(undo == undo_ptr->insert_undo || undo == undo_ptr->update_undo); - if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) || undo->empty) { return; @@ -785,13 +781,9 @@ trx_resurrect_table_locks( } lock_table_ix_resurrect(table, trx); - DBUG_PRINT("ib_trx", - ("resurrect" TRX_ID_FMT - " table '%s' IX lock from %s undo", - trx_get_id_for_print(trx), - table->name.m_name, - undo == undo_ptr->insert_undo - ? "insert" : "update")); + DBUG_LOG("ib_trx", + "resurrect " << ib::hex(trx->id) + << " IX lock on " << table->name); dict_table_close(table, FALSE, FALSE); } @@ -823,7 +815,7 @@ trx_resurrect_insert( ++trx->rsegs.m_redo.rseg->trx_ref_count; *trx->xid = undo->xid; trx->id = undo->trx_id; - trx->rsegs.m_redo.insert_undo = undo; + trx->rsegs.m_redo.old_insert = undo; trx->is_recovered = true; /* This is single-threaded startup code, we do not need the @@ -873,14 +865,6 @@ trx_resurrect_insert( trx->no = TRX_ID_MAX; } - /* trx_start_low() is not called with resurrect, so need to initialize - start time here.*/ - if (trx->state == TRX_STATE_ACTIVE - || trx->state == TRX_STATE_PREPARED) { - - trx->start_time = ut_time(); - } - if (undo->dict_operation) { trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); trx->table_id = undo->table_id; @@ -952,7 +936,7 @@ trx_resurrect_update( ++trx->rsegs.m_redo.rseg->trx_ref_count; *trx->xid = undo->xid; trx->id = undo->trx_id; - trx->rsegs.m_redo.update_undo = undo; + trx->rsegs.m_redo.undo = undo; trx->is_recovered = true; /* This is single-threaded startup code, we do not need the @@ -974,13 +958,6 @@ trx_resurrect_update( trx->no = TRX_ID_MAX; } - /* trx_start_low() is not called with resurrect, so need to initialize - start time here.*/ - if (trx->state == TRX_STATE_ACTIVE - || trx->state == TRX_STATE_PREPARED) { - trx->start_time = ut_time(); - } - if (undo->dict_operation) { trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); trx->table_id = undo->table_id; @@ -1009,6 +986,7 @@ trx_lists_init_at_db_start() /* Look from the rollback segments if there exist undo logs for transactions. */ + const ib_time_t start_time = ut_time(); for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { trx_undo_t* undo; @@ -1020,23 +998,24 @@ trx_lists_init_at_db_start() continue; } - /* Resurrect transactions that were doing inserts. */ - for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list); + /* Resurrect transactions that were doing inserts + using the old separate insert_undo log. */ + for (undo = UT_LIST_GET_FIRST(rseg->old_insert_list); undo != NULL; undo = UT_LIST_GET_NEXT(undo_list, undo)) { trx_t* trx; trx = trx_resurrect_insert(undo, rseg); + trx->start_time = start_time; trx_sys_rw_trx_add(trx); - trx_resurrect_table_locks( - trx, &trx->rsegs.m_redo, undo); + trx_resurrect_table_locks(trx, undo); } - /* Ressurrect transactions that were doing updates. */ - for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list); + /* Ressurrect other transactions. */ + for (undo = UT_LIST_GET_FIRST(rseg->undo_list); undo != NULL; undo = UT_LIST_GET_NEXT(undo_list, undo)) { @@ -1049,6 +1028,7 @@ trx_lists_init_at_db_start() if (trx == NULL) { trx = trx_allocate_for_background(); + trx->start_time = start_time; ut_d(trx->start_file = __FILE__); ut_d(trx->start_line = __LINE__); @@ -1058,8 +1038,7 @@ trx_lists_init_at_db_start() trx_sys_rw_trx_add(trx); - trx_resurrect_table_locks( - trx, &trx->rsegs.m_redo, undo); + trx_resurrect_table_locks(trx, undo); } } @@ -1363,7 +1342,7 @@ trx_start_low( /** Set the serialisation number for a persistent committed transaction. @param[in,out] trx committed transaction with persistent changes -@param[in,out] rseg rollback segment for update_undo, or NULL */ +@param[in,out] rseg rollback segment for undo, or NULL */ static void trx_serialise(trx_t* trx, trx_rseg_t* rseg) @@ -1439,37 +1418,49 @@ trx_write_serialisation_history( } if (!trx->rsegs.m_redo.rseg) { - ut_ad(!trx->rsegs.m_redo.insert_undo); - ut_ad(!trx->rsegs.m_redo.update_undo); + ut_ad(!trx->rsegs.m_redo.undo); + ut_ad(!trx->rsegs.m_redo.old_insert); return false; } - trx_undo_t* insert = trx->rsegs.m_redo.insert_undo; - trx_undo_t* update = trx->rsegs.m_redo.update_undo; + trx_undo_t* undo = trx->rsegs.m_redo.undo; + trx_undo_t*& old_insert = trx->rsegs.m_redo.old_insert; - if (!insert && !update) { + if (!undo && !old_insert) { return false; } ut_ad(!trx->read_only); - trx_rseg_t* update_rseg = update ? trx->rsegs.m_redo.rseg : NULL; + trx_rseg_t* undo_rseg = undo ? undo->rseg : NULL; + ut_ad(!undo || undo->rseg == trx->rsegs.m_redo.rseg); mutex_enter(&trx->rsegs.m_redo.rseg->mutex); /* Assign the transaction serialisation number and add any - update_undo log to the purge queue. */ - trx_serialise(trx, update_rseg); + undo log to the purge queue. */ + trx_serialise(trx, undo_rseg); /* It is not necessary to acquire trx->undo_mutex here because only a single OS thread is allowed to commit this transaction. */ - if (insert) { - trx_undo_set_state_at_finish(insert, mtr); - } - if (update) { - /* The undo logs and possible delete-marked records - for updates and deletes will be purged later. */ - page_t* undo_hdr_page = trx_undo_set_state_at_finish( - update, mtr); + if (UNIV_LIKELY_NULL(old_insert)) { + page_t* undo_hdr_page = trx_undo_set_state_at_finish( + old_insert, mtr); + trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; + trx_purge_add_update_undo_to_history(trx, undo_hdr_page, mtr); + UT_LIST_REMOVE(rseg->old_insert_list, old_insert); + if (old_insert->state == TRX_UNDO_CACHED) { + UT_LIST_ADD_FIRST(rseg->undo_cached, old_insert); + MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED); + } else { + ut_ad(old_insert->state == TRX_UNDO_TO_PURGE); + trx_undo_mem_free(old_insert); + } + old_insert = NULL; + } + if (undo) { + /* The undo logs will be processed and purged later. */ + page_t* undo_hdr_page = trx_undo_set_state_at_finish( + undo, mtr); trx_undo_update_cleanup(trx, undo_hdr_page, mtr); } @@ -1786,7 +1777,7 @@ trx_commit_in_memory( } } - ut_ad(!trx->rsegs.m_redo.update_undo); + ut_ad(!trx->rsegs.m_redo.undo); if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) { mutex_enter(&rseg->mutex); @@ -1794,14 +1785,14 @@ trx_commit_in_memory( --rseg->trx_ref_count; mutex_exit(&rseg->mutex); - if (trx_undo_t*& insert = trx->rsegs.m_redo.insert_undo) { + if (trx_undo_t*& insert = trx->rsegs.m_redo.old_insert) { ut_ad(insert->rseg == rseg); trx_undo_commit_cleanup(insert, false); insert = NULL; } } - ut_ad(!trx->rsegs.m_redo.insert_undo); + ut_ad(!trx->rsegs.m_redo.old_insert); if (mtr != NULL) { if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) { @@ -2012,7 +2003,7 @@ trx_commit( DBUG_EXECUTE_IF("ib_trx_commit_crash_before_trx_commit_start", DBUG_SUICIDE();); - if (trx->has_logged()) { + if (trx->has_logged_or_recovered()) { mtr = &local_mtr; mtr_start_sync(mtr); } else { @@ -2034,9 +2025,9 @@ trx_cleanup_at_db_startup( { ut_ad(trx->is_recovered); ut_ad(!trx->rsegs.m_noredo.undo); - ut_ad(!trx->rsegs.m_redo.update_undo); + ut_ad(!trx->rsegs.m_redo.undo); - if (trx_undo_t*& undo = trx->rsegs.m_redo.insert_undo) { + if (trx_undo_t*& undo = trx->rsegs.m_redo.old_insert) { ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); trx_undo_commit_cleanup(undo, false); undo = NULL; @@ -2666,6 +2657,9 @@ static lsn_t trx_prepare_low(trx_t* trx) { + ut_ad(!trx->rsegs.m_redo.old_insert); + ut_ad(!trx->is_recovered); + mtr_t mtr; /* It is not necessary to acquire trx->undo_mutex here because @@ -2685,15 +2679,15 @@ trx_prepare_low(trx_t* trx) mtr.commit(); } - trx_undo_t* insert = trx->rsegs.m_redo.insert_undo; - trx_undo_t* update = trx->rsegs.m_redo.update_undo; + trx_undo_t* undo = trx->rsegs.m_redo.undo; - if (!insert && !update) { + if (!undo) { /* There were no changes to persistent tables. */ return(0); } trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; + ut_ad(undo->rseg == rseg); mtr.start(true); @@ -2703,17 +2697,7 @@ trx_prepare_low(trx_t* trx) world, at the serialization point of lsn. */ mutex_enter(&rseg->mutex); - - if (insert) { - ut_ad(insert->rseg == rseg); - trx_undo_set_state_at_prepare(trx, insert, false, &mtr); - } - - if (update) { - ut_ad(update->rseg == rseg); - trx_undo_set_state_at_prepare(trx, update, false, &mtr); - } - + trx_undo_set_state_at_prepare(trx, undo, false, &mtr); mutex_exit(&rseg->mutex); /* Make the XA PREPARE durable. */ diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 3eab3733f8f..8f343ce2e7e 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -100,7 +100,6 @@ void trx_undo_page_init( /*===============*/ page_t* undo_page, /*!< in: undo log segment page */ - ulint type, /*!< in: undo log segment type */ mtr_t* mtr); /*!< in: mtr */ /********************************************************************//** @@ -112,26 +111,11 @@ trx_undo_mem_create( /*================*/ trx_rseg_t* rseg, /*!< in: rollback segment memory object */ ulint id, /*!< in: slot index within rseg */ - ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ trx_id_t trx_id, /*!< in: id of the trx for which the undo log is created */ const XID* xid, /*!< in: X/Open XA transaction identification*/ ulint page_no,/*!< in: undo log header page number */ ulint offset);/*!< in: undo log header byte offset on page */ -/***************************************************************//** -Initializes a cached insert undo log header page for new use. NOTE that this -function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change -the operation of this function! -@return undo log header byte offset on page */ -static -ulint -trx_undo_insert_header_reuse( -/*=========================*/ - page_t* undo_page, /*!< in/out: insert undo log segment - header page, x-latched */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr); /*!< in: mtr */ /**********************************************************************//** If an update undo log can be discarded immediately, this function frees the space, resetting the page to the proper state for caching. */ @@ -341,12 +325,11 @@ void trx_undo_page_init_log( /*===================*/ page_t* undo_page, /*!< in: undo log page */ - ulint type, /*!< in: undo log type */ mtr_t* mtr) /*!< in: mtr */ { mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr); - mlog_catenate_ulint_compressed(mtr, type); + mlog_catenate_ulint_compressed(mtr, 0); } /***********************************************************//** @@ -360,9 +343,9 @@ trx_undo_parse_page_init( page_t* page, /*!< in: page or NULL */ mtr_t* mtr) /*!< in: mtr or NULL */ { - ulint type; - - type = mach_parse_compressed(&ptr, end_ptr); + if (mach_parse_compressed(&ptr, end_ptr)) { + recv_sys->found_corrupt_log = true; + } if (ptr == NULL) { @@ -370,7 +353,7 @@ trx_undo_parse_page_init( } if (page) { - trx_undo_page_init(page, type, mtr); + trx_undo_page_init(page, mtr); } return(const_cast(ptr)); @@ -383,14 +366,13 @@ void trx_undo_page_init( /*===============*/ page_t* undo_page, /*!< in: undo log segment page */ - ulint type, /*!< in: undo log segment type */ mtr_t* mtr) /*!< in: mtr */ { trx_upagef_t* page_hdr; page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type); + *reinterpret_cast(page_hdr + TRX_UNDO_PAGE_TYPE) = 0; mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); @@ -399,7 +381,7 @@ trx_undo_page_init( fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG); - trx_undo_page_init_log(undo_page, type, mtr); + trx_undo_page_init_log(undo_page, mtr); } /***************************************************************//** @@ -413,8 +395,6 @@ trx_undo_seg_create( trx_rseg_t* rseg MY_ATTRIBUTE((unused)),/*!< in: rollback segment */ trx_rsegf_t* rseg_hdr,/*!< in: rollback segment header, page x-latched */ - ulint type, /*!< in: type of the segment: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ ulint* id, /*!< out: slot index within rseg header */ page_t** undo_page, /*!< out: segment header page x-latched, NULL @@ -435,9 +415,6 @@ trx_undo_seg_create( ut_ad(rseg_hdr != NULL); ut_ad(mutex_own(&(rseg->mutex))); - /* fputs(type == TRX_UNDO_INSERT - ? "Creating insert undo log segment\n" - : "Creating update undo log segment\n", stderr); */ slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr); if (slot_no == ULINT_UNDEFINED) { @@ -477,7 +454,7 @@ trx_undo_seg_create( page_hdr = *undo_page + TRX_UNDO_PAGE_HDR; seg_hdr = *undo_page + TRX_UNDO_SEG_HDR; - trx_undo_page_init(*undo_page, type, mtr); + trx_undo_page_init(*undo_page, mtr); mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE, TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE, @@ -674,23 +651,7 @@ trx_undo_header_add_space_for_xid( MLOG_2BYTES, mtr); } -/**********************************************************************//** -Writes the mtr log entry of an undo log header reuse. */ -UNIV_INLINE -void -trx_undo_insert_header_reuse_log( -/*=============================*/ - const page_t* undo_page, /*!< in: undo log header page */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in: mtr */ -{ - mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr); - - mlog_catenate_ull_compressed(mtr, trx_id); -} - -/** Parse the redo log entry of an undo log page header create or reuse. -@param[in] type MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE +/** Parse the redo log entry of an undo log page header create. @param[in] ptr redo log record @param[in] end_ptr end of log buffer @param[in,out] page page frame or NULL @@ -698,7 +659,6 @@ trx_undo_insert_header_reuse_log( @return end of log record or NULL */ byte* trx_undo_parse_page_header( - mlog_id_t type, const byte* ptr, const byte* end_ptr, page_t* page, @@ -707,82 +667,13 @@ trx_undo_parse_page_header( trx_id_t trx_id = mach_u64_parse_compressed(&ptr, end_ptr); if (ptr != NULL && page != NULL) { - switch (type) { - case MLOG_UNDO_HDR_CREATE: - trx_undo_header_create(page, trx_id, mtr); - return(const_cast(ptr)); - case MLOG_UNDO_HDR_REUSE: - trx_undo_insert_header_reuse(page, trx_id, mtr); - return(const_cast(ptr)); - default: - break; - } - ut_ad(0); + trx_undo_header_create(page, trx_id, mtr); + return(const_cast(ptr)); } return(const_cast(ptr)); } -/***************************************************************//** -Initializes a cached insert undo log header page for new use. NOTE that this -function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change -the operation of this function! -@return undo log header byte offset on page */ -static -ulint -trx_undo_insert_header_reuse( -/*=========================*/ - page_t* undo_page, /*!< in/out: insert undo log segment - header page, x-latched */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_upagef_t* page_hdr; - trx_usegf_t* seg_hdr; - trx_ulogf_t* log_hdr; - ulint free; - ulint new_free; - - ut_ad(mtr && undo_page); - - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - - free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE; - - ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100); - - log_hdr = undo_page + free; - - new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE; - - /* Insert undo data is not needed after commit: we may free all - the space on the page */ - - ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_INSERT); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free); - - mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE); - - log_hdr = undo_page + free; - - mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id); - mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free); - - mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE); - mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE); - - /* Write the log record MLOG_UNDO_HDR_REUSE */ - trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr); - - return(free); -} - /**********************************************************************//** Writes the redo log entry of an update undo log header discard. */ UNIV_INLINE @@ -905,7 +796,7 @@ trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr) buf_block_dbg_add_level(new_block, SYNC_TRX_UNDO_PAGE); undo->last_page_no = new_block->page.id.page_no(); - trx_undo_page_init(new_block->frame, undo->type, mtr); + trx_undo_page_init(new_block->frame, mtr); flst_add_last(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + header_page, @@ -1212,28 +1103,20 @@ trx_undo_mem_create_at_db_start( mtr_t* mtr) /*!< in: mtr */ { page_t* undo_page; - trx_upagef_t* page_header; trx_usegf_t* seg_header; trx_ulogf_t* undo_header; trx_undo_t* undo; - ulint type; ulint state; trx_id_t trx_id; ulint offset; - fil_addr_t last_addr; - page_t* last_page; - trx_undo_rec_t* rec; XID xid; - ibool xid_exists = FALSE; ut_a(id < TRX_RSEG_N_SLOTS); undo_page = trx_undo_page_get(page_id_t(rseg->space, page_no), mtr); - - page_header = undo_page + TRX_UNDO_PAGE_HDR; - - type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES, - mtr); + const ulint type = mach_read_from_2( + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + undo_page); + ut_ad(type == 0 || type == TRX_UNDO_INSERT || type == TRX_UNDO_UPDATE); seg_header = undo_page + TRX_UNDO_SEG_HDR; state = mach_read_from_2(seg_header + TRX_UNDO_STATE); @@ -1244,75 +1127,62 @@ trx_undo_mem_create_at_db_start( trx_id = mach_read_from_8(undo_header + TRX_UNDO_TRX_ID); - xid_exists = mtr_read_ulint(undo_header + TRX_UNDO_XID_EXISTS, - MLOG_1BYTE, mtr); + const bool xid_exists = mtr_read_ulint( + undo_header + TRX_UNDO_XID_EXISTS, MLOG_1BYTE, mtr); /* Read X/Open XA transaction identification if it exists, or set it to NULL. */ xid.null(); - if (xid_exists == TRUE) { + if (xid_exists) { trx_undo_read_xid(undo_header, &xid); } mutex_enter(&(rseg->mutex)); - undo = trx_undo_mem_create(rseg, id, type, trx_id, &xid, - page_no, offset); + undo = trx_undo_mem_create(rseg, id, trx_id, &xid, page_no, offset); mutex_exit(&(rseg->mutex)); undo->dict_operation = mtr_read_ulint( undo_header + TRX_UNDO_DICT_TRANS, MLOG_1BYTE, mtr); undo->table_id = mach_read_from_8(undo_header + TRX_UNDO_TABLE_ID); - undo->state = state; undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST); - /* If the log segment is being freed, the page list is inconsistent! */ - if (state == TRX_UNDO_TO_FREE) { + if (UNIV_UNLIKELY(state == TRX_UNDO_TO_FREE)) { + /* This is an old-format insert_undo log segment that + is being freed. The page list is inconsistent. */ + ut_ad(type == TRX_UNDO_INSERT); + state = TRX_UNDO_TO_PURGE; + } else { + fil_addr_t last_addr = flst_get_last( + seg_header + TRX_UNDO_PAGE_LIST, mtr); - goto add_to_list; + undo->last_page_no = last_addr.page; + undo->top_page_no = last_addr.page; + + page_t* last_page = trx_undo_page_get( + page_id_t(rseg->space, undo->last_page_no), mtr); + + const trx_undo_rec_t* rec = trx_undo_page_get_last_rec( + last_page, page_no, offset); + + undo->empty = !rec; + if (rec) { + undo->top_offset = rec - last_page; + undo->top_undo_no = trx_undo_rec_get_undo_no(rec); + } } - last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr); + undo->state = state; - undo->last_page_no = last_addr.page; - undo->top_page_no = last_addr.page; - - last_page = trx_undo_page_get( - page_id_t(rseg->space, undo->last_page_no), mtr); - - rec = trx_undo_page_get_last_rec(last_page, page_no, offset); - - if (rec == NULL) { - undo->empty = TRUE; + if (state != TRX_UNDO_CACHED) { + UT_LIST_ADD_LAST(type == TRX_UNDO_INSERT + ? rseg->old_insert_list + : rseg->undo_list, undo); } else { - undo->empty = FALSE; - undo->top_offset = rec - last_page; - undo->top_undo_no = trx_undo_rec_get_undo_no(rec); - } -add_to_list: - if (type == TRX_UNDO_INSERT) { - if (state != TRX_UNDO_CACHED) { - - UT_LIST_ADD_LAST(rseg->insert_undo_list, undo); - } else { - - UT_LIST_ADD_LAST(rseg->insert_undo_cached, undo); - - MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED); - } - } else { - ut_ad(type == TRX_UNDO_UPDATE); - if (state != TRX_UNDO_CACHED) { - - UT_LIST_ADD_LAST(rseg->update_undo_list, undo); - } else { - - UT_LIST_ADD_LAST(rseg->update_undo_cached, undo); - - MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED); - } + UT_LIST_ADD_LAST(rseg->undo_cached, undo); + MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED); } return(undo); @@ -1383,8 +1253,6 @@ trx_undo_mem_create( /*================*/ trx_rseg_t* rseg, /*!< in: rollback segment memory object */ ulint id, /*!< in: slot index within rseg */ - ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ trx_id_t trx_id, /*!< in: id of the trx for which the undo log is created */ const XID* xid, /*!< in: X/Open transaction identification */ @@ -1405,7 +1273,6 @@ trx_undo_mem_create( } undo->id = id; - undo->type = type; undo->state = TRX_UNDO_ACTIVE; undo->del_marks = FALSE; undo->trx_id = trx_id; @@ -1479,8 +1346,6 @@ trx_undo_create( /*============*/ trx_t* trx, /*!< in: transaction */ trx_rseg_t* rseg, /*!< in: rollback segment memory copy */ - ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ trx_id_t trx_id, /*!< in: id of the trx for which the undo log is created */ const XID* xid, /*!< in: X/Open transaction identification*/ @@ -1506,8 +1371,7 @@ trx_undo_create( rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr); - err = trx_undo_seg_create(rseg, rseg_header, type, &id, - &undo_page, mtr); + err = trx_undo_seg_create(rseg, rseg_header, &id, &undo_page, mtr); if (err != DB_SUCCESS) { /* Did not succeed */ @@ -1523,8 +1387,7 @@ trx_undo_create( trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr); - *undo = trx_undo_mem_create(rseg, id, type, trx_id, xid, - page_no, offset); + *undo = trx_undo_mem_create(rseg, id, trx_id, xid, page_no, offset); if (*undo == NULL) { err = DB_OUT_OF_MEMORY; @@ -1544,8 +1407,6 @@ trx_undo_reuse_cached( /*==================*/ trx_t* trx, /*!< in: transaction */ trx_rseg_t* rseg, /*!< in: rollback segment memory object */ - ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ trx_id_t trx_id, /*!< in: id of the trx for which the undo log is used */ const XID* xid, /*!< in: X/Open XA transaction identification */ @@ -1557,30 +1418,12 @@ trx_undo_reuse_cached( ut_ad(mutex_own(&(rseg->mutex))); - if (type == TRX_UNDO_INSERT) { - - undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached); - if (undo == NULL) { - - return(NULL); - } - - UT_LIST_REMOVE(rseg->insert_undo_cached, undo); - - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); - } else { - ut_ad(type == TRX_UNDO_UPDATE); - - undo = UT_LIST_GET_FIRST(rseg->update_undo_cached); - if (undo == NULL) { - - return(NULL); - } - - UT_LIST_REMOVE(rseg->update_undo_cached, undo); - - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); + undo = UT_LIST_GET_FIRST(rseg->undo_cached); + if (undo == NULL) { + return(NULL); } + UT_LIST_REMOVE(rseg->undo_cached, undo); + MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); ut_ad(undo->size == 1); ut_a(undo->id < TRX_RSEG_N_SLOTS); @@ -1588,22 +1431,9 @@ trx_undo_reuse_cached( undo_page = trx_undo_page_get( page_id_t(undo->space, undo->hdr_page_no), mtr); - if (type == TRX_UNDO_INSERT) { - offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr); - - trx_undo_header_add_space_for_xid( - undo_page, undo_page + offset, mtr); - } else { - ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_UPDATE); - - offset = trx_undo_header_create(undo_page, trx_id, mtr); - - trx_undo_header_add_space_for_xid( - undo_page, undo_page + offset, mtr); - } + offset = trx_undo_header_create(undo_page, trx_id, mtr); + trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr); trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset); return(undo); @@ -1652,18 +1482,13 @@ A new undo log is created or a cached undo log reused. @param[in,out] trx transaction @param[in] rseg rollback segment @param[out] undo the undo log -@param[in] type TRX_UNDO_INSERT or TRX_UNDO_UPDATE @retval DB_SUCCESS on success @retval DB_TOO_MANY_CONCURRENT_TRXS @retval DB_OUT_OF_FILE_SPACE @retval DB_READ_ONLY @retval DB_OUT_OF_MEMORY */ dberr_t -trx_undo_assign_undo( - trx_t* trx, - trx_rseg_t* rseg, - trx_undo_t** undo, - ulint type) +trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) { const bool is_temp = rseg == trx->rsegs.m_noredo.rseg; mtr_t mtr; @@ -1672,40 +1497,31 @@ trx_undo_assign_undo( ut_ad(mutex_own(&trx->undo_mutex)); ut_ad(rseg == trx->rsegs.m_redo.rseg || rseg == trx->rsegs.m_noredo.rseg); - ut_ad(type == TRX_UNDO_INSERT || type == TRX_UNDO_UPDATE); + ut_ad(undo == (is_temp + ? &trx->rsegs.m_noredo.undo + : &trx->rsegs.m_redo.undo)); mtr.start(trx); if (is_temp) { mtr.set_log_mode(MTR_LOG_NO_REDO); - ut_ad(undo == &trx->rsegs.m_noredo.undo); - } else { - ut_ad(undo == (type == TRX_UNDO_INSERT - ? &trx->rsegs.m_redo.insert_undo - : &trx->rsegs.m_redo.update_undo)); } mutex_enter(&rseg->mutex); - *undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, trx->xid, - &mtr); + *undo = trx_undo_reuse_cached(trx, rseg, trx->id, trx->xid, &mtr); if (*undo == NULL) { - err = trx_undo_create(trx, rseg, type, trx->id, trx->xid, + err = trx_undo_create(trx, rseg, trx->id, trx->xid, undo, &mtr); if (err != DB_SUCCESS) { goto func_exit; } } - if (is_temp) { - UT_LIST_ADD_FIRST(rseg->insert_undo_list, *undo); - } else { - UT_LIST_ADD_FIRST(type == TRX_UNDO_INSERT - ? rseg->insert_undo_list - : rseg->update_undo_list, *undo); - if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) { - trx_undo_mark_as_dict_operation(trx, *undo, &mtr); - } + UT_LIST_ADD_FIRST(rseg->undo_list, *undo); + + if (!is_temp && trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) { + trx_undo_mark_as_dict_operation(trx, *undo, &mtr); } func_exit: @@ -1742,10 +1558,6 @@ trx_undo_set_state_at_finish( < TRX_UNDO_PAGE_REUSE_LIMIT) { state = TRX_UNDO_CACHED; - - } else if (undo->type == TRX_UNDO_INSERT) { - - state = TRX_UNDO_TO_FREE; } else { state = TRX_UNDO_TO_PURGE; } @@ -1759,7 +1571,7 @@ trx_undo_set_state_at_finish( /** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK. @param[in,out] trx transaction -@param[in,out] undo insert_undo or update_undo log +@param[in,out] undo undo log @param[in] rollback false=XA PREPARE, true=XA ROLLBACK @param[in,out] mtr mini-transaction @return undo log segment header page, x-latched */ @@ -1824,20 +1636,20 @@ trx_undo_update_cleanup( x-latched */ mtr_t* mtr) /*!< in: mtr */ { - trx_undo_t* undo = trx->rsegs.m_redo.update_undo; + trx_undo_t* undo = trx->rsegs.m_redo.undo; trx_rseg_t* rseg = undo->rseg; ut_ad(mutex_own(&rseg->mutex)); trx_purge_add_update_undo_to_history(trx, undo_page, mtr); - UT_LIST_REMOVE(rseg->update_undo_list, undo); + UT_LIST_REMOVE(rseg->undo_list, undo); - trx->rsegs.m_redo.update_undo = NULL; + trx->rsegs.m_redo.undo = NULL; if (undo->state == TRX_UNDO_CACHED) { - UT_LIST_ADD_FIRST(rseg->update_undo_cached, undo); + UT_LIST_ADD_FIRST(rseg->undo_cached, undo); MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED); } else { @@ -1847,7 +1659,7 @@ trx_undo_update_cleanup( } } -/** Free an insert or temporary undo log after commit or rollback. +/** Free an old insert or temporary undo log after commit or rollback. The information is not needed after a commit or rollback, therefore the data can be discarded. @param[in,out] undo undo log @@ -1857,20 +1669,22 @@ trx_undo_commit_cleanup(trx_undo_t* undo, bool is_temp) { trx_rseg_t* rseg = undo->rseg; ut_ad(is_temp == !rseg->is_persistent()); + ut_ad(!is_temp || 0 == UT_LIST_GET_LEN(rseg->old_insert_list)); mutex_enter(&rseg->mutex); - UT_LIST_REMOVE(rseg->insert_undo_list, undo); + UT_LIST_REMOVE(is_temp ? rseg->undo_list : rseg->old_insert_list, + undo); if (undo->state == TRX_UNDO_CACHED) { - UT_LIST_ADD_FIRST(rseg->insert_undo_cached, undo); + UT_LIST_ADD_FIRST(rseg->undo_cached, undo); MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED); } else { - ut_ad(undo->state == TRX_UNDO_TO_FREE); + ut_ad(undo->state == TRX_UNDO_TO_PURGE); /* Delete first the undo log segment in the file */ mutex_exit(&rseg->mutex); - trx_undo_seg_free(undo, is_temp); + trx_undo_seg_free(undo, true); mutex_enter(&rseg->mutex); ut_ad(rseg->curr_size > undo->size); @@ -1891,8 +1705,8 @@ trx_undo_free_prepared( { ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS); - if (trx->rsegs.m_redo.update_undo) { - switch (trx->rsegs.m_redo.update_undo->state) { + if (trx_undo_t*& undo = trx->rsegs.m_redo.undo) { + switch (undo->state) { case TRX_UNDO_PREPARED: break; case TRX_UNDO_ACTIVE: @@ -1906,15 +1720,13 @@ trx_undo_free_prepared( ut_error; } - UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->update_undo_list, - trx->rsegs.m_redo.update_undo); - trx_undo_mem_free(trx->rsegs.m_redo.update_undo); - - trx->rsegs.m_redo.update_undo = NULL; + UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->undo_list, undo); + trx_undo_mem_free(undo); + undo = NULL; } - if (trx->rsegs.m_redo.insert_undo) { - switch (trx->rsegs.m_redo.insert_undo->state) { + if (trx_undo_t*& undo = trx->rsegs.m_redo.old_insert) { + switch (undo->state) { case TRX_UNDO_PREPARED: break; case TRX_UNDO_ACTIVE: @@ -1928,18 +1740,15 @@ trx_undo_free_prepared( ut_error; } - UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->insert_undo_list, - trx->rsegs.m_redo.insert_undo); - trx_undo_mem_free(trx->rsegs.m_redo.insert_undo); - - trx->rsegs.m_redo.insert_undo = NULL; + UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->old_insert_list, undo); + trx_undo_mem_free(undo); + undo = NULL; } if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) { ut_a(undo->state == TRX_UNDO_PREPARED); - UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->insert_undo_list, - undo); + UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->undo_list, undo); trx_undo_mem_free(undo); undo = NULL; } @@ -1991,37 +1800,22 @@ trx_undo_truncate_tablespace( /* Before re-initialization ensure that we free the existing structure. There can't be any active transactions. */ - ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0); - ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0); + ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0); trx_undo_t* next_undo; - for (trx_undo_t* undo = - UT_LIST_GET_FIRST(rseg->update_undo_cached); + for (trx_undo_t* undo = UT_LIST_GET_FIRST(rseg->undo_cached); undo != NULL; undo = next_undo) { next_undo = UT_LIST_GET_NEXT(undo_list, undo); - UT_LIST_REMOVE(rseg->update_undo_cached, undo); + UT_LIST_REMOVE(rseg->undo_cached, undo); MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); trx_undo_mem_free(undo); } - for (trx_undo_t* undo = - UT_LIST_GET_FIRST(rseg->insert_undo_cached); - undo != NULL; - undo = next_undo) { - - next_undo = UT_LIST_GET_NEXT(undo_list, undo); - UT_LIST_REMOVE(rseg->insert_undo_cached, undo); - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); - trx_undo_mem_free(undo); - } - - UT_LIST_INIT(rseg->update_undo_list, &trx_undo_t::undo_list); - UT_LIST_INIT(rseg->update_undo_cached, &trx_undo_t::undo_list); - UT_LIST_INIT(rseg->insert_undo_list, &trx_undo_t::undo_list); - UT_LIST_INIT(rseg->insert_undo_cached, &trx_undo_t::undo_list); + UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list); + UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list); rseg->max_size = mtr_read_ulint( rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, &mtr);