From 7f5849a809670c6466a71dc93133ca0a1504de05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Sun, 7 Apr 2019 12:05:12 +0300 Subject: [PATCH 01/12] MDEV-18309: Remove unused code --- storage/innobase/dict/dict0load.cc | 8 +++----- storage/innobase/fil/fil0fil.cc | 18 ------------------ storage/innobase/include/fil0fil.h | 2 -- storage/innobase/row/row0mysql.cc | 2 +- 4 files changed, 4 insertions(+), 26 deletions(-) diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index fc399faaea3..508dde92269 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1449,7 +1449,7 @@ next: /* Now that we have the proper name for this tablespace, look to see if it is already in the tablespace cache. */ if (fil_space_for_table_exists_in_mem( - space_id, table_name.m_name, NULL, flags)) { + space_id, table_name.m_name, flags)) { /* Recovery can open a datafile that does not match SYS_DATAFILES. If they don't match, update SYS_DATAFILES. */ @@ -2817,13 +2817,11 @@ dict_load_table( /** Opens a tablespace for dict_load_table_one() @param[in,out] table A table that refers to the tablespace to open -@param[in] heap A memory heap @param[in] ignore_err Whether to ignore an error. */ UNIV_INLINE void dict_load_tablespace( dict_table_t* table, - mem_heap_t* heap, dict_err_ignore_t ignore_err) { ut_ad(!dict_table_is_temporary(table)); @@ -2844,7 +2842,7 @@ dict_load_tablespace( /* The tablespace may already be open. */ if (fil_space_for_table_exists_in_mem( - table->space, space_name, heap, table->flags)) { + table->space, space_name, table->flags)) { return; } @@ -2994,7 +2992,7 @@ err_exit: btr_pcur_close(&pcur); mtr_commit(&mtr); - dict_load_tablespace(table, heap, ignore_err); + dict_load_tablespace(table, ignore_err); dict_load_columns(table, heap); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 3c9546b0304..0e6f1b05cd6 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -4482,22 +4482,6 @@ fil_file_readdir_next_file( return(-1); } -/*******************************************************************//** -Report that a tablespace for a table was not found. */ -static -void -fil_report_missing_tablespace( -/*===========================*/ - const char* name, /*!< in: table name */ - ulint space_id) /*!< in: table's space id */ -{ - ib::error() << "Table " << name - << " in the InnoDB data dictionary has tablespace id " - << space_id << "," - " but tablespace with that id or name does not exist. Have" - " you deleted or moved .ibd files?"; -} - /** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations. (Typically when upgrading from MariaDB 10.1.0..10.1.20.) @param[in] space_id tablespace ID @@ -4540,14 +4524,12 @@ memory cache. Note that if we have not done a crash recovery at the database startup, there may be many tablespaces which are not yet in the memory cache. @param[in] id Tablespace ID @param[in] name Tablespace name used in fil_space_create(). -@param[in] heap Heap memory @param[in] table_flags table flags @return true if a matching tablespace exists in the memory cache */ bool fil_space_for_table_exists_in_mem( ulint id, const char* name, - mem_heap_t* heap, ulint table_flags) { fil_space_t* space; diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 1096b990146..30b2d205cf8 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1162,14 +1162,12 @@ memory cache. Note that if we have not done a crash recovery at the database startup, there may be many tablespaces which are not yet in the memory cache. @param[in] id Tablespace ID @param[in] name Tablespace name used in fil_space_create(). -@param[in] heap Heap memory @param[in] table_flags table flags @return true if a matching tablespace exists in the memory cache */ bool fil_space_for_table_exists_in_mem( ulint id, const char* name, - mem_heap_t* heap, ulint table_flags); /** Try to extend a tablespace if it is smaller than the specified size. diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 76024bbe170..e19501edea4 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -3231,7 +3231,7 @@ row_drop_single_table_tablespace( /* If the tablespace is not in the cache, just delete the file. */ if (!fil_space_for_table_exists_in_mem( - space_id, tablename, NULL, table_flags)) { + space_id, tablename, table_flags)) { /* Force a delete of any discarded or temporary files. */ fil_delete_file(filepath); From 7d720ca8de1b19cff73f0833f1d95924f1088487 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sun, 7 Apr 2019 00:52:05 +0200 Subject: [PATCH 02/12] cmake: don't use generated files to detect a submodule Even if Makefile for some reason was checked in in a submodule, it is still a generated file, will be cleaned, won't be in a source package. One cannot jump to conclusions if it doesn't exist. --- storage/rocksdb/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt index acbab49c586..4ffa28d4bdb 100644 --- a/storage/rocksdb/CMakeLists.txt +++ b/storage/rocksdb/CMakeLists.txt @@ -5,8 +5,8 @@ MACRO(SKIP_ROCKSDB_PLUGIN msg) RETURN() ENDMACRO() -IF (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/Makefile") - SKIP_ROCKSDB_PLUGIN("Missing Makefile in rocksdb directory. Try \"git submodule update\".") +IF (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/CMakeLists.txt") + SKIP_ROCKSDB_PLUGIN("Missing CMakeLists.txt in rocksdb directory. Try \"git submodule update\".") ENDIF() CHECK_LIBRARY_EXISTS(rt timer_delete "" HAVE_TIMER_DELETE) From 5023e465a9e82503c7521b2d555c8d2350528911 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sun, 7 Apr 2019 15:49:30 +0200 Subject: [PATCH 03/12] copy-paste error fixed thanks @FaramosCZ --- sql/sql_table.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 071f4763b34..bb6945c7e01 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -6091,7 +6091,7 @@ drop_create_field: Key_part_spec *kp; if ((kp= part_it++)) chkname= kp->field_name.str; - if (keyname == NULL) + if (chkname == NULL) continue; } if (key->type == chk_key->type && From e124ff17e07f395c34c8bfea31a2b35022919601 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 8 Apr 2019 10:03:46 +0200 Subject: [PATCH 04/12] cmake: force Boost dependency as needed because FindBoost.cmake won't do it --- cmake/build_depends.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/build_depends.cmake b/cmake/build_depends.cmake index 0d17c22cf98..333df580200 100644 --- a/cmake/build_depends.cmake +++ b/cmake/build_depends.cmake @@ -16,6 +16,11 @@ IF(RPM) ENDIF() ENDMACRO() + # FindBoost.cmake doesn't leave any trace, do it here + IF (Boost_INCLUDE_DIR) + FIND_FILE(Boost_config_hpp boost/config.hpp PATHS ${Boost_INCLUDE_DIR}) + ENDIF() + GET_CMAKE_PROPERTY(ALL_VARS CACHE_VARIABLES) FOREACH (V ${ALL_VARS}) GET_PROPERTY(H CACHE ${V} PROPERTY HELPSTRING) From 4b822111ef043286806225e903cfe4de584b2c6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 8 Apr 2019 14:41:02 +0300 Subject: [PATCH 05/12] MDEV-8139: Clean up the freeing of B-tree pages btr_page_free(): Renamed from btr_page_free_low(). If scrubbing is enabled, zero out the page with proper redo logging. Only pass ahi=true to fseg_free_page() if the page is actually indexed. fil_space_t::modify_check(): Renamed from fsp_space_modify_check(). fsp_init_file_page(): Define inline. --- storage/innobase/btr/btr0btr.cc | 164 ++++++----------------------- storage/innobase/btr/btr0bulk.cc | 2 +- storage/innobase/btr/btr0cur.cc | 10 +- storage/innobase/fsp/fsp0fsp.cc | 80 +++++--------- storage/innobase/include/btr0btr.h | 32 ++---- storage/innobase/include/fil0fil.h | 8 +- storage/innobase/include/fsp0fsp.h | 20 ++++ 7 files changed, 98 insertions(+), 218 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 53e50cc76a8..a8699e4fb55 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -715,159 +715,57 @@ btr_page_free_for_ibuf( mtr)); } -/**************************************************************//** -Frees a file page used in an index tree. Can be used also to (BLOB) -external storage pages. */ -void -btr_page_free_low( -/*==============*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: block to be freed, x-latched */ - ulint level, /*!< in: page level (ULINT_UNDEFINED=BLOB) */ - bool blob, /*!< in: blob page */ - mtr_t* mtr) /*!< in: mtr */ +/** Free an index page. +@param[in,out] index index tree +@param[in,out] block block to be freed +@param[in,out] mtr mini-transaction +@param[in] blob whether this is freeing a BLOB page */ +void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, + bool blob) { - fseg_header_t* seg_header; - page_t* root; - ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table)); +#ifdef BTR_CUR_HASH_ADAPT + ut_ad(!block->index || !blob); + ut_ad(!block->index || page_is_leaf(block->frame)); +#endif + ut_ad(index->space == block->page.id.space()); + /* The root page is freed by btr_free_root(). */ + ut_ad(block->page.id.page_no() != index->page); + ut_ad(mtr->is_named_space(index->space)); + /* The page gets invalid for optimistic searches: increment the frame modify clock */ buf_block_modify_clock_inc(block); - if (blob) { - ut_a(level == 0); - } - - bool scrub = srv_immediate_scrub_data_uncompressed; - /* scrub page */ - if (scrub && blob) { - /* blob page: scrub entire page */ - // TODO(jonaso): scrub only what is actually needed - page_t* page = buf_block_get_frame(block); - memset(page + PAGE_HEADER, 0, - UNIV_PAGE_SIZE - PAGE_HEADER); -#ifdef UNIV_DEBUG_SCRUBBING - fprintf(stderr, - "btr_page_free_low: scrub blob page %lu/%lu\n", - buf_block_get_space(block), - buf_block_get_page_no(block)); -#endif /* UNIV_DEBUG_SCRUBBING */ - } else if (scrub) { - /* scrub records on page */ - - /* TODO(jonaso): in theory we could clear full page - * but, since page still remains in buffer pool, and - * gets flushed etc. Lots of routines validates consistency - * of it. And in order to remain structurally consistent - * we clear each record by it own - * - * NOTE: The TODO below mentions removing page from buffer pool - * and removing redo entries, once that is done, clearing full - * pages should be possible - */ - uint cnt = 0; - ulint bytes = 0; - page_t* page = buf_block_get_frame(block); - mem_heap_t* heap = NULL; - ulint* offsets = NULL; - rec_t* rec = page_rec_get_next(page_get_infimum_rec(page)); - while (!page_rec_is_supremum(rec)) { - offsets = rec_get_offsets(rec, index, offsets, - page_is_leaf(page), - ULINT_UNDEFINED, - &heap); - ulint size = rec_offs_data_size(offsets); - memset(rec, 0, size); - rec = page_rec_get_next(rec); - cnt++; - bytes += size; - } -#ifdef UNIV_DEBUG_SCRUBBING - fprintf(stderr, - "btr_page_free_low: scrub %lu/%lu - " - "%u records " ULINTPF " bytes\n", - buf_block_get_space(block), - buf_block_get_page_no(block), - cnt, bytes); -#endif /* UNIV_DEBUG_SCRUBBING */ - if (heap) { - mem_heap_free(heap); - } - } - -#ifdef UNIV_DEBUG_SCRUBBING - if (scrub == false) { - fprintf(stderr, - "btr_page_free_low %lu/%lu blob: %u\n", - buf_block_get_space(block), - buf_block_get_page_no(block), - blob); - } -#endif /* UNIV_DEBUG_SCRUBBING */ - if (dict_index_is_ibuf(index)) { - btr_page_free_for_ibuf(index, block, mtr); - return; } - root = btr_root_get(index, mtr); - - if (level == 0 || level == ULINT_UNDEFINED) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - } else { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - } - -#ifdef UNIV_GIS_DEBUG - if (dict_index_is_spatial(index)) { - fprintf(stderr, "GIS_DIAG: Freed %ld\n", - (long) block->page.id.page_no()); - } -#endif - - if (scrub) { - /** - * Reset page type so that scrub thread won't try to scrub it - */ - mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE, - FIL_PAGE_TYPE_ALLOCATED, MLOG_2BYTES, mtr); - } - + /* TODO: Discard any operations for block from mtr->log. + The page will be freed, so previous changes to it by this + mini-transaction should not matter. */ + page_t* root = btr_root_get(index, mtr); + fseg_header_t* seg_header = &root[blob || page_is_leaf(block->frame) + ? PAGE_HEADER + PAGE_BTR_SEG_LEAF + : PAGE_HEADER + PAGE_BTR_SEG_TOP]; fseg_free_page(seg_header, block->page.id.space(), block->page.id.page_no(), - level != ULINT_UNDEFINED, mtr); + block->index != NULL, mtr); /* The page was marked free in the allocation bitmap, but it - should remain buffer-fixed until mtr_commit(mtr) or until it + should remain exclusively latched until mtr_t::commit() or until it is explicitly freed from the mini-transaction. */ ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table)); - /* TODO: Discard any operations on the page from the redo log - and remove the block from the flush list and the buffer pool. - This would free up buffer pool earlier and reduce writes to - both the tablespace and the redo log. */ -} -/**************************************************************//** -Frees a file page used in an index tree. NOTE: cannot free field external -storage pages because the page must contain info on its level. */ -void -btr_page_free( -/*==========*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: block to be freed, x-latched */ - mtr_t* mtr) /*!< in: mtr */ -{ - const page_t* page = buf_block_get_frame(block); - ulint level = btr_page_get_level(page, mtr); - - ut_ad(fil_page_index_page_check(block->frame)); - ut_ad(level != ULINT_UNDEFINED); - btr_page_free_low(index, block, level, false, mtr); + if (srv_immediate_scrub_data_uncompressed) { + /* In MDEV-15528 this call must be removed, and we should + zero out the page after the redo log for this mini-transaction + has been durably written. */ + fsp_init_file_page(fil_space_get(index->space), block, mtr); + } } /**************************************************************//** diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc index 57cff1166da..998ff3f1e45 100644 --- a/storage/innobase/btr/btr0bulk.cc +++ b/storage/innobase/btr/btr0bulk.cc @@ -1037,7 +1037,7 @@ BtrBulk::finish(dberr_t err) root_page_bulk.copyIn(first_rec); /* Remove last page. */ - btr_page_free_low(m_index, last_block, m_root_level, false, &mtr); + btr_page_free(m_index, last_block, &mtr); /* Do not flush the last page. */ last_block->page.flush_observer = NULL; diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 36165a5b247..578183898b6 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -7484,8 +7484,7 @@ btr_free_externally_stored_field( } next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT); - btr_page_free_low(index, ext_block, 0, - true, &mtr); + btr_page_free(index, ext_block, &mtr, true); if (page_zip != NULL) { mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO, @@ -7511,12 +7510,7 @@ btr_free_externally_stored_field( next_page_no = mach_read_from_4( page + FIL_PAGE_DATA + BTR_BLOB_HDR_NEXT_PAGE_NO); - - /* We must supply the page level (= 0) as an argument - because we did not store it on the page (we save the - space overhead from an index page header. */ - btr_page_free_low(index, ext_block, 0, - true, &mtr); + btr_page_free(index, ext_block, &mtr, true); mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO, next_page_no, diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 2798df9f126..7ef7788e4ec 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -612,60 +612,34 @@ void fsp_apply_init_file_page(buf_block_t* block) #ifdef UNIV_DEBUG /** Assert that the mini-transaction is compatible with updating an allocation bitmap page. -@param[in] id tablespace identifier @param[in] mtr mini-transaction */ -static -void -fsp_space_modify_check( - const fil_space_t* space, - const mtr_t* mtr) +void fil_space_t::modify_check(const mtr_t& mtr) const { - switch (mtr->get_log_mode()) { + switch (mtr.get_log_mode()) { case MTR_LOG_SHORT_INSERTS: case MTR_LOG_NONE: /* These modes are only allowed within a non-bitmap page when there is a higher-level redo log record written. */ - ut_ad(space->purpose == FIL_TYPE_TABLESPACE - || space->purpose == FIL_TYPE_TEMPORARY); + ut_ad(purpose == FIL_TYPE_TABLESPACE + || purpose == FIL_TYPE_TEMPORARY); break; case MTR_LOG_NO_REDO: - ut_ad(space->purpose == FIL_TYPE_TEMPORARY - || space->purpose == FIL_TYPE_IMPORT - || space->redo_skipped_count - || space->is_being_truncated - || srv_is_tablespace_truncated(space->id)); + ut_ad(purpose == FIL_TYPE_TEMPORARY + || purpose == FIL_TYPE_IMPORT + || redo_skipped_count + || is_being_truncated + || srv_is_tablespace_truncated(id)); return; case MTR_LOG_ALL: - /* We may only write redo log for a persistent tablespace. */ - ut_ad(space->purpose == FIL_TYPE_TABLESPACE); - ut_ad(mtr->is_named_space(space->id)); + /* We may only write redo log for a persistent + tablespace. */ + ut_ad(purpose == FIL_TYPE_TABLESPACE); + ut_ad(mtr.is_named_space(id)); return; } - ut_ad(0); + ut_ad(!"invalid log mode"); } -#endif /* UNIV_DEBUG */ - -/** Initialize a file page. -@param[in,out] block file page -@param[in,out] mtr mini-transaction */ -static void fsp_init_file_page(buf_block_t* block, mtr_t* mtr) -{ - fsp_apply_init_file_page(block); - mlog_write_initial_log_record(block->frame, MLOG_INIT_FILE_PAGE2, mtr); -} - -#ifdef UNIV_DEBUG -static -void -fsp_init_file_page(const fil_space_t* space, buf_block_t* block, mtr_t* mtr) -{ - ut_d(fsp_space_modify_check(space, mtr)); - ut_ad(space->id == block->page.id.space()); - fsp_init_file_page(block, mtr); -} -#else /* UNIV_DEBUG */ -# define fsp_init_file_page(space, block, mtr) fsp_init_file_page(block, mtr) #endif /**********************************************************************//** @@ -816,7 +790,7 @@ fsp_header_inc_size( ut_ad(mtr); fil_space_t* space = mtr_x_lock_space(space_id, mtr); - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); header = fsp_get_space_header( space, page_size_t(space->flags), mtr); @@ -877,7 +851,7 @@ fsp_try_extend_data_file_with_pages( ulint size; ut_a(!is_system_tablespace(space->id)); - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); size = mach_read_from_4(header + FSP_SIZE); ut_ad(size == space->size_in_header); @@ -909,7 +883,7 @@ fsp_try_extend_data_file(fil_space_t* space, fsp_header_t* header, mtr_t* mtr) "ran out of space. Please add another file or use" " 'autoextend' for the last file in setting"; - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); if (space->id == TRX_SYS_SPACE && !srv_sys_space.can_auto_extend_last_file()) { @@ -1072,7 +1046,7 @@ fsp_fill_free_list( ulint i; ut_ad(page_offset(header) == FSP_HEADER_OFFSET); - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); /* Check if we can fill free list from above the free list limit */ size = mach_read_from_4(header + FSP_SIZE); @@ -1395,7 +1369,7 @@ fsp_alloc_free_page( ulint free; const ulint space_id = space->id; - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); header = fsp_get_space_header(space, page_size, mtr); /* Get the hinted descriptor */ @@ -1501,7 +1475,7 @@ fsp_free_page( ulint frag_n_used; ut_ad(mtr); - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); /* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */ @@ -1823,7 +1797,7 @@ fsp_free_seg_inode( page_t* page; fsp_header_t* space_header; - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); page = page_align(inode); @@ -2074,7 +2048,7 @@ fseg_create_general( fil_space_t* space = mtr_x_lock_space(space_id, mtr); const page_size_t page_size(space->flags); - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); if (page != 0) { block = buf_page_get(page_id_t(space_id, page), page_size, @@ -2273,7 +2247,7 @@ fseg_fill_free_list( ut_ad(inode && mtr); ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); reserved = fseg_n_reserved_pages_low(inode, &used, mtr); @@ -2341,7 +2315,7 @@ fseg_alloc_free_extent( ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); if (flst_get_len(inode + FSEG_FREE) > 0) { /* Segment free list is not empty, allocate from it */ @@ -2428,7 +2402,7 @@ fseg_alloc_free_page_low( seg_id = mach_read_from_8(seg_inode + FSEG_ID); ut_ad(seg_id); - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE); reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr); @@ -3032,7 +3006,7 @@ fseg_free_page_low( ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); #ifdef BTR_CUR_HASH_ADAPT /* Drop search system page hash index if the page is found in the pool and is hashed */ @@ -3228,7 +3202,7 @@ fseg_free_extent( ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8)); ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - ut_d(fsp_space_modify_check(space, mtr)); + ut_d(space->modify_check(*mtr)); first_page_in_extent = page - (page % FSP_EXTENT_SIZE); diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index 23e445e2d5f..54fd1bb6e45 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -671,16 +671,6 @@ btr_page_alloc( the page */ MY_ATTRIBUTE((warn_unused_result)); /**************************************************************//** -Frees a file page used in an index tree. NOTE: cannot free field external -storage pages because the page must contain info on its level. */ -void -btr_page_free( -/*==========*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: block to be freed, x-latched */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** Creates a new index page (not the root, and also not used in page reorganization). @see btr_page_empty(). */ void @@ -691,18 +681,16 @@ btr_page_create( dict_index_t* index, /*!< in: index */ ulint level, /*!< in: the B-tree level of the page */ mtr_t* mtr); /*!< in: mtr */ -/**************************************************************//** -Frees a file page used in an index tree. Can be used also to BLOB -external storage pages. */ -void -btr_page_free_low( -/*==============*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: block to be freed, x-latched */ - ulint level, /*!< in: page level (ULINT_UNDEFINED=BLOB) */ - bool blob, /*!< in: blob page */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull(1,2))); + +/** Free an index page. +@param[in,out] index index tree +@param[in,out] block block to be freed +@param[in,out] mtr mini-transaction +@param[in] blob whether this is freeing a BLOB page */ +MY_ATTRIBUTE((nonnull)) +void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, + bool blob = false); + /**************************************************************//** Gets the root node of a tree and x- or s-latches it. @return root page, x- or s-latched */ diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 30b2d205cf8..484f17565d4 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -108,7 +108,7 @@ struct fil_space_t { ulint redo_skipped_count; /*!< reference count for operations who want to skip redo log in the file space in order - to make fsp_space_modify_check pass. */ + to make modify_check() pass. */ #endif fil_type_t purpose;/*!< purpose */ UT_LIST_BASE_NODE_T(fil_node_t) chain; @@ -208,6 +208,12 @@ struct fil_space_t { fil_node_t* add(const char* name, pfs_os_file_t handle, ulint size, bool is_raw, bool atomic_write, ulint max_pages = ULINT_MAX); +#ifdef UNIV_DEBUG + /** Assert that the mini-transaction is compatible with + updating an allocation bitmap page. + @param[in] mtr mini-transaction */ + void modify_check(const mtr_t& mtr) const; +#endif /* UNIV_DEBUG */ }; /** Value of fil_space_t::magic_n */ diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index 6f2fbff2174..bb495286805 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -688,6 +688,26 @@ fsp_descr_page( @param[in,out] block buffer pool block */ void fsp_apply_init_file_page(buf_block_t* block); +/** Initialize a file page. +@param[in] space tablespace +@param[in,out] block file page +@param[in,out] mtr mini-transaction */ +inline void fsp_init_file_page( +#ifdef UNIV_DEBUG + const fil_space_t* space, +#endif + buf_block_t* block, mtr_t* mtr) +{ + ut_d(space->modify_check(*mtr)); + ut_ad(space->id == block->page.id.space()); + fsp_apply_init_file_page(block); + mlog_write_initial_log_record(block->frame, MLOG_INIT_FILE_PAGE2, mtr); +} + +#ifndef UNIV_DEBUG +# define fsp_init_file_page(space, block, mtr) fsp_init_file_page(block, mtr) +#endif + #ifdef UNIV_BTR_PRINT /*******************************************************************//** Writes info of a segment. */ From caa8c20abee29416c82031a8ae2f7dd23d821b10 Mon Sep 17 00:00:00 2001 From: Vlad Lesin Date: Mon, 8 Apr 2019 15:08:04 +0300 Subject: [PATCH 06/12] MDEV-14192 Mariabackup assertion failure: byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0 xtrabackup_backup_func(): If the log checkpoint header changed since we last read it, search for the most recent checkpoint again. Otherwise, we could corrupt the backup of the redo log, because the least significant bits of checkpoint_lsn_start would not match log_sys->log.lsn. --- extra/mariabackup/xtrabackup.cc | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index b8c87f09ec1..3eca4687bbd 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -4003,9 +4003,7 @@ static bool xtrabackup_backup_low() /** Implement --backup @return whether the operation succeeded */ -static -bool -xtrabackup_backup_func() +static bool xtrabackup_backup_func() { MY_STAT stat_info; uint i; @@ -4175,38 +4173,25 @@ fail: log_mutex_enter(); +reread_log_header: dberr_t err = recv_find_max_checkpoint(&max_cp_field); if (err != DB_SUCCESS) { -log_fail: + msg("Error: cannot read redo log header"); log_mutex_exit(); goto fail; } if (log_sys->log.format == 0) { -old_format: - msg("Error: cannot process redo log" - " before MariaDB 10.2.2"); + msg("Error: cannot process redo log before MariaDB 10.2.2"); log_mutex_exit(); - goto log_fail; + goto fail; } const byte* buf = log_sys->checkpoint_buf; - -reread_log_header: checkpoint_lsn_start = log_sys->log.lsn; checkpoint_no_start = log_sys->next_checkpoint_no; - err = recv_find_max_checkpoint(&max_cp_field); - - if (err != DB_SUCCESS) { - goto log_fail; - } - - if (log_sys->log.format == 0) { - goto old_format; - } - log_group_header_read(&log_sys->log, max_cp_field); if (checkpoint_no_start != mach_read_from_8(buf + LOG_CHECKPOINT_NO)) { @@ -4253,6 +4238,12 @@ reread_log_header: mach_write_to_8(log_hdr + LOG_CHECKPOINT_OFFSET, (checkpoint_lsn_start & (OS_FILE_LOG_BLOCK_SIZE - 1)) | LOG_FILE_HDR_SIZE); + /* The least significant bits of LOG_CHECKPOINT_OFFSET must be + stored correctly in the copy of the ib_logfile. The most significant + bits, which identify the start offset of the log block in the file, + we did choose freely, as LOG_FILE_HDR_SIZE. */ + ut_ad(!((log_sys->log.lsn ^ checkpoint_lsn_start) + & (OS_FILE_LOG_BLOCK_SIZE - 1))); log_block_set_checksum(log_hdr, log_block_calc_checksum_crc32(log_hdr)); /* Write checkpoint page 1 and two empty log pages before the From f120a15b93dcb88adc31ad4fc7caf63b813ca63e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 8 Apr 2019 15:36:03 +0300 Subject: [PATCH 07/12] MDEV-19212 4GB Limit on large_pages - integer overflow os_mem_alloc_large(): Invoke the macro ut_2pow_round() with the correct argument type. innobase_large_page_size, innobase_use_large_pages, os_use_large_pages, os_large_page_size: Remove. Simply refer to opt_large_page_size, my_use_large_pages. --- extra/mariabackup/xtrabackup.cc | 5 ----- storage/innobase/buf/buf0buf.cc | 2 +- storage/innobase/handler/ha_innodb.cc | 6 ------ storage/innobase/include/os0proc.h | 8 +------ storage/innobase/os/os0proc.cc | 30 +++++++++++++-------------- 5 files changed, 16 insertions(+), 35 deletions(-) diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 3eca4687bbd..2b8abc04446 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -221,8 +221,6 @@ const char *defaults_group = "mysqld"; #define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */ #define HA_INNOBASE_RANGE_COUNT 100 -ulong innobase_large_page_size = 0; - /* The default values for the following, type long or longlong, start-up parameters are declared in mysqld.cc: */ @@ -247,7 +245,6 @@ affects Windows: */ char* innobase_unix_file_flush_method; my_bool innobase_use_doublewrite; -my_bool innobase_use_large_pages; my_bool innobase_file_per_table; my_bool innobase_locks_unsafe_for_binlog; my_bool innobase_rollback_on_timeout; @@ -1919,8 +1916,6 @@ innodb_init_param(void) srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; - os_use_large_pages = (ibool) innobase_use_large_pages; - os_large_page_size = (ulint) innobase_large_page_size; row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout; srv_file_per_table = (my_bool) innobase_file_per_table; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index cd0ef0e1610..4703df3d33c 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1566,7 +1566,7 @@ buf_chunk_init( chunk->blocks = (buf_block_t*) chunk->mem; /* Align a pointer to the first frame. Note that when - os_large_page_size is smaller than UNIV_PAGE_SIZE, + opt_large_page_size is smaller than UNIV_PAGE_SIZE, we may allocate one fewer block than requested. When it is bigger, we may allocate more blocks than requested. */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index adcb845c031..8a97aeed0ba 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4265,12 +4265,6 @@ innobase_change_buffering_inited_ok: innodb_log_checksums = innodb_log_checksums_func_update( NULL, innodb_log_checksums); -#ifdef HAVE_LINUX_LARGE_PAGES - if ((os_use_large_pages = my_use_large_pages)) { - os_large_page_size = opt_large_page_size; - } -#endif - row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout; srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog; diff --git a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h index a73ba5a9e84..69d48c4d2f6 100644 --- a/storage/innobase/include/os0proc.h +++ b/storage/innobase/include/os0proc.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -42,12 +42,6 @@ typedef unsigned long int os_process_id_t; system with os_mem_alloc_large(). */ extern ulint os_total_large_mem_allocated; -/** Whether to use large pages in the buffer pool */ -extern my_bool os_use_large_pages; - -/** Large page size. This may be a boot-time option on some platforms */ -extern uint os_large_page_size; - /** Converts the current process id to a number. @return process id as a number */ ulint diff --git a/storage/innobase/os/os0proc.cc b/storage/innobase/os/os0proc.cc index 0c8f8795be8..685bcef25db 100644 --- a/storage/innobase/os/os0proc.cc +++ b/storage/innobase/os/os0proc.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -25,6 +26,9 @@ Created 9/30/1995 Heikki Tuuri *******************************************************/ #include "univ.i" +#ifdef HAVE_LINUX_LARGE_PAGES +# include "mysqld.h" +#endif /* FreeBSD for example has only MAP_ANON, Linux has MAP_ANONYMOUS and MAP_ANON but MAP_ANON is marked as deprecated */ @@ -38,12 +42,6 @@ MAP_ANON but MAP_ANON is marked as deprecated */ system with os_mem_alloc_large(). */ ulint os_total_large_mem_allocated = 0; -/** Whether to use large pages in the buffer pool */ -my_bool os_use_large_pages; - -/** Large page size. This may be a boot-time option on some platforms */ -uint os_large_page_size; - /** Converts the current process id to a number. @return process id as a number */ ulint @@ -66,18 +64,18 @@ os_mem_alloc_large( { void* ptr; ulint size; -#if defined HAVE_LINUX_LARGE_PAGES && defined UNIV_LINUX +#ifdef HAVE_LINUX_LARGE_PAGES int shmid; struct shmid_ds buf; - if (!os_use_large_pages || !os_large_page_size) { + if (!my_use_large_pages || !opt_large_page_size) { goto skip; } - /* Align block size to os_large_page_size */ - ut_ad(ut_is_2pow(os_large_page_size)); - size = ut_2pow_round(*n + (os_large_page_size - 1), - os_large_page_size); + /* Align block size to opt_large_page_size */ + ut_ad(ut_is_2pow(opt_large_page_size)); + size = ut_2pow_round(*n + opt_large_page_size - 1, + ulint(opt_large_page_size)); shmid = shmget(IPC_PRIVATE, (size_t) size, SHM_HUGETLB | SHM_R | SHM_W); if (shmid < 0) { @@ -109,7 +107,7 @@ os_mem_alloc_large( ib::warn() << "Using conventional memory pool"; skip: -#endif /* HAVE_LINUX_LARGE_PAGES && UNIV_LINUX */ +#endif /* HAVE_LINUX_LARGE_PAGES */ #ifdef _WIN32 SYSTEM_INFO system_info; @@ -161,13 +159,13 @@ os_mem_free_large( { ut_a(os_total_large_mem_allocated >= size); -#if defined HAVE_LINUX_LARGE_PAGES && defined UNIV_LINUX - if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) { +#ifdef HAVE_LINUX_LARGE_PAGES + if (my_use_large_pages && opt_large_page_size && !shmdt(ptr)) { my_atomic_addlint( &os_total_large_mem_allocated, -size); return; } -#endif /* HAVE_LINUX_LARGE_PAGES && UNIV_LINUX */ +#endif /* HAVE_LINUX_LARGE_PAGES */ #ifdef _WIN32 /* When RELEASE memory, the size parameter must be 0. Do not use MEM_RELEASE with MEM_DECOMMIT. */ From e7f426d2c9be3ebc7c373e80873996ce8857ab48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 8 Apr 2019 15:55:09 +0300 Subject: [PATCH 08/12] MDEV-19212: Replace macros with type-safe inline functions The regression that was reported in MDEV-19212 occurred due to use of macros that did not ensure that the arguments have compatible types. ut_2pow_remainder(), ut_2pow_round(), ut_calc_align(): Define as inline function templates. UT_CALC_ALIGN(): Define as a macro, because this is used in compile_time_assert(). Only starting with C++11 (MariaDB 10.4) we could define the inline functions as constexpr. --- storage/innobase/fsp/fsp0fsp.cc | 2 +- storage/innobase/include/mem0mem.h | 8 ++++---- storage/innobase/include/ut0ut.h | 14 ++++++-------- storage/innobase/log/log0log.cc | 20 +++++++++----------- storage/innobase/os/os0proc.cc | 6 ++---- 5 files changed, 22 insertions(+), 28 deletions(-) diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 7ef7788e4ec..ac48747513b 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -955,7 +955,7 @@ fsp_try_extend_data_file(fil_space_t* space, fsp_header_t* header, mtr_t* mtr) /* We ignore any fragments of a full megabyte when storing the size to the space header */ - space->size_in_header = ut_calc_align_down( + space->size_in_header = ut_2pow_round( space->size, (1024 * 1024) / page_size.physical()); mlog_write_ulint( diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h index 17a388ab68c..658919f27c7 100644 --- a/storage/innobase/include/mem0mem.h +++ b/storage/innobase/include/mem0mem.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2018, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -77,7 +77,7 @@ is the maximum size for a single allocated buffer: */ /** Space needed when allocating for a user a field of length N. The space is allocated only in multiples of UNIV_MEM_ALIGNMENT. */ -#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT) +#define MEM_SPACE_NEEDED(N) UT_CALC_ALIGN((N), UNIV_MEM_ALIGNMENT) #ifdef UNIV_DEBUG /** Macro for memory heap creation. @@ -342,8 +342,8 @@ struct mem_block_info_t { #define MEM_FREED_BLOCK_MAGIC_N 547711122 /* Header size for a memory heap block */ -#define MEM_BLOCK_HEADER_SIZE ut_calc_align(sizeof(mem_block_info_t),\ - UNIV_MEM_ALIGNMENT) +#define MEM_BLOCK_HEADER_SIZE UT_CALC_ALIGN(sizeof(mem_block_info_t),\ + UNIV_MEM_ALIGNMENT) #include "mem0mem.ic" #endif diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index 4e9c2599933..81f4a9b8b23 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -165,26 +166,23 @@ Calculates fast the remainder of n/m when m is a power of two. @param n in: numerator @param m in: denominator, must be a power of two @return the remainder of n/m */ -#define ut_2pow_remainder(n, m) ((n) & ((m) - 1)) +template inline T ut_2pow_remainder(T n, T m){return n & (m - 1);} /*************************************************************//** Calculates the biggest multiple of m that is not bigger than n when m is a power of two. In other words, rounds n down to m * k. @param n in: number to round down @param m in: alignment, must be a power of two @return n rounded down to the biggest possible integer multiple of m */ -#define ut_2pow_round(n, m) ((n) & ~((m) - 1)) -/** Align a number down to a multiple of a power of two. -@param n in: number to round down -@param m in: alignment, must be a power of two -@return n rounded down to the biggest possible integer multiple of m */ -#define ut_calc_align_down(n, m) ut_2pow_round(n, m) +template inline T ut_2pow_round(T n, T m) { return n & ~(m - 1); } /********************************************************//** Calculates the smallest multiple of m that is not smaller than n when m is a power of two. In other words, rounds n up to m * k. @param n in: number to round up @param m in: alignment, must be a power of two @return n rounded up to the smallest possible integer multiple of m */ -#define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1)) +#define UT_CALC_ALIGN(n, m) ((n + m - 1) & ~(m - 1)) +template inline T ut_calc_align(T n, T m) +{ return UT_CALC_ALIGN(n, m); } /*************************************************************//** Calculates fast the 2-logarithm of a number, rounded upward to an diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 516d92164c4..3a54af8e3c2 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -203,10 +203,8 @@ log_buffer_extend( log_sys->is_extending = true; - while (ut_calc_align_down(log_sys->buf_free, - OS_FILE_LOG_BLOCK_SIZE) - != ut_calc_align_down(log_sys->buf_next_to_write, - OS_FILE_LOG_BLOCK_SIZE)) { + while ((log_sys->buf_free ^ log_sys->buf_next_to_write) + & (OS_FILE_LOG_BLOCK_SIZE - 1)) { /* Buffer might have >1 blocks to write still. */ log_mutex_exit_all(); @@ -215,9 +213,8 @@ log_buffer_extend( log_mutex_enter_all(); } - move_start = ut_calc_align_down( - log_sys->buf_free, - OS_FILE_LOG_BLOCK_SIZE); + move_start = ut_2pow_round(log_sys->buf_free, + ulint(OS_FILE_LOG_BLOCK_SIZE)); move_end = log_sys->buf_free; /* store the last log block in buffer */ @@ -1079,8 +1076,8 @@ log_buffer_switch() ut_ad(log_write_mutex_own()); const byte* old_buf = log_sys->buf; - ulint area_end = ut_calc_align(log_sys->buf_free, - OS_FILE_LOG_BLOCK_SIZE); + ulint area_end = ut_calc_align( + log_sys->buf_free, ulint(OS_FILE_LOG_BLOCK_SIZE)); if (log_sys->first_in_use) { log_sys->first_in_use = false; @@ -1215,8 +1212,9 @@ loop: start_offset = log_sys->buf_next_to_write; end_offset = log_sys->buf_free; - area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE); - area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE); + area_start = ut_2pow_round(start_offset, + ulint(OS_FILE_LOG_BLOCK_SIZE)); + area_end = ut_calc_align(end_offset, ulint(OS_FILE_LOG_BLOCK_SIZE)); ut_ad(area_end - area_start > 0); diff --git a/storage/innobase/os/os0proc.cc b/storage/innobase/os/os0proc.cc index 685bcef25db..b3202992af0 100644 --- a/storage/innobase/os/os0proc.cc +++ b/storage/innobase/os/os0proc.cc @@ -115,10 +115,8 @@ skip: /* Align block size to system page size */ ut_ad(ut_is_2pow(system_info.dwPageSize)); - /* system_info.dwPageSize is only 32-bit. Casting to ulint is required - on 64-bit Windows. */ - size = *n = ut_2pow_round(*n + (system_info.dwPageSize - 1), - (ulint) system_info.dwPageSize); + size = *n = ut_2pow_round(*n + (system_info.dwPageSize - 1), + system_info.dwPageSize); ptr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); if (!ptr) { From 7362f115543d34b1d3f06e61be9235b30c0abc8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 8 Apr 2019 17:06:06 +0300 Subject: [PATCH 09/12] Require --big-test for innodb.undo_truncate_recover --- mysql-test/suite/innodb/t/undo_truncate_recover.test | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mysql-test/suite/innodb/t/undo_truncate_recover.test b/mysql-test/suite/innodb/t/undo_truncate_recover.test index e6f8afb7857..c28a2154f92 100644 --- a/mysql-test/suite/innodb/t/undo_truncate_recover.test +++ b/mysql-test/suite/innodb/t/undo_truncate_recover.test @@ -2,18 +2,14 @@ # WL#6965: Truncate UNDO logs. # +--source include/big_test.inc # With larger innodb_page_size, the undo log tablespaces do not grow enough. --source include/innodb_page_size_small.inc --source include/have_innodb.inc --source include/have_debug.inc --source include/have_undo_tablespaces.inc - -# Valgrind would complain about memory leaks when we crash on purpose. ---source include/not_valgrind.inc -# Embedded server does not support crashing +# Tests with embedded server do not support restarting --source include/not_embedded.inc -# Avoid CrashReporter popup on Mac ---source include/not_crashrep.inc SET GLOBAL innodb_undo_logs = 4; SET GLOBAL innodb_undo_log_truncate = 1; From 05b84b2568ea62a3af69dc50ec346a22c0ac96c6 Mon Sep 17 00:00:00 2001 From: Vlad Lesin Date: Mon, 8 Apr 2019 18:25:43 +0300 Subject: [PATCH 10/12] MDEV-14192: Add debug assertions --- extra/mariabackup/xtrabackup.cc | 4 ++-- storage/innobase/include/log0log.h | 25 +++++++++++++++++++++---- storage/innobase/log/log0recv.cc | 25 +++++++++++++------------ 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 6b9a466d443..e70295417b7 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -4129,7 +4129,7 @@ reread_log_header: } const byte* buf = log_sys.checkpoint_buf; - checkpoint_lsn_start = log_sys.log.lsn; + checkpoint_lsn_start = log_sys.log.get_lsn(); checkpoint_no_start = log_sys.next_checkpoint_no; log_header_read(max_cp_field); @@ -4181,7 +4181,7 @@ reread_log_header: stored correctly in the copy of the ib_logfile. The most significant bits, which identify the start offset of the log block in the file, we did choose freely, as LOG_FILE_HDR_SIZE. */ - ut_ad(!((log_sys.log.lsn ^ checkpoint_lsn_start) + ut_ad(!((log_sys.log.get_lsn() ^ checkpoint_lsn_start) & (OS_FILE_LOG_BLOCK_SIZE - 1))); log_block_set_checksum(log_hdr, log_block_calc_checksum_crc32(log_hdr)); diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 061876022b6..3ea211ed545 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -2,7 +2,7 @@ Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2009, Google Inc. -Copyright (c) 2017, 2018, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -563,11 +563,12 @@ struct log_t{ uint32_t subformat; /** individual log file size in bytes, including the header */ lsn_t file_size; + private: /** lsn used to fix coordinates within the log group */ lsn_t lsn; /** the byte offset of the above lsn */ lsn_t lsn_offset; - + public: /** used only in recovery: recovery scan succeeded up to this lsn in this log group */ lsn_t scanned_lsn; @@ -584,8 +585,9 @@ struct log_t{ /** Set the field values to correspond to a given lsn. */ void set_fields(lsn_t lsn) { - lsn_offset = calc_lsn_offset(lsn); - this->lsn = lsn; + lsn_t c_lsn_offset = calc_lsn_offset(lsn); + set_lsn(lsn); + set_lsn_offset(c_lsn_offset); } /** Read a log segment to log_sys.buf. @@ -604,6 +606,10 @@ struct log_t{ { n_files = 0; } + void set_lsn(lsn_t a_lsn); + lsn_t get_lsn() const { return lsn; } + void set_lsn_offset(lsn_t a_lsn); + lsn_t get_lsn_offset() const { return lsn_offset; } } log; /** The fields involved in the log buffer flush @{ */ @@ -741,6 +747,17 @@ inline lsn_t log_t::files::calc_lsn_offset(lsn_t lsn) const return l + LOG_FILE_HDR_SIZE * (1 + l / (file_size - LOG_FILE_HDR_SIZE)); } +inline void log_t::files::set_lsn(lsn_t a_lsn) { + ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned()); + lsn = a_lsn; +} + +inline void log_t::files::set_lsn_offset(lsn_t a_lsn) { + ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned()); + ut_ad((lsn % OS_FILE_LOG_BLOCK_SIZE) == (a_lsn % OS_FILE_LOG_BLOCK_SIZE)); + lsn_offset = a_lsn; +} + /** Test if flush order mutex is owned. */ #define log_flush_order_mutex_own() \ mutex_own(&log_sys.log_flush_order_mutex) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 60732ed8fef..9930fdeae03 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -990,11 +990,12 @@ recv_find_max_checkpoint_0(ulint* max_field) *max_field = field; max_no = checkpoint_no; - log_sys.log.lsn = mach_read_from_8( - buf + LOG_CHECKPOINT_LSN); - log_sys.log.lsn_offset = static_cast( - mach_read_from_4(buf + OFFSET_HIGH32)) << 32 - | mach_read_from_4(buf + OFFSET_LOW32); + log_sys.log.set_lsn(mach_read_from_8( + buf + LOG_CHECKPOINT_LSN)); + log_sys.log.set_lsn_offset( + lsn_t(mach_read_from_4(buf + OFFSET_HIGH32)) + << 32 + | mach_read_from_4(buf + OFFSET_LOW32)); } } @@ -1076,7 +1077,7 @@ static dberr_t recv_log_format_0_recover(lsn_t lsn, bool crypt) static dberr_t recv_log_recover_10_4() { ut_ad(!log_sys.is_encrypted()); - const lsn_t lsn = log_sys.log.lsn; + const lsn_t lsn = log_sys.log.get_lsn(); const lsn_t source_offset = log_sys.log.calc_lsn_offset(lsn); const ulint page_no = (ulint) (source_offset / univ_page_size.physical()); @@ -1202,10 +1203,10 @@ recv_find_max_checkpoint(ulint* max_field) if (checkpoint_no >= max_no) { *max_field = field; max_no = checkpoint_no; - log_sys.log.lsn = mach_read_from_8( - buf + LOG_CHECKPOINT_LSN); - log_sys.log.lsn_offset = mach_read_from_8( - buf + LOG_CHECKPOINT_OFFSET); + log_sys.log.set_lsn(mach_read_from_8( + buf + LOG_CHECKPOINT_LSN)); + log_sys.log.set_lsn_offset(mach_read_from_8( + buf + LOG_CHECKPOINT_OFFSET)); log_sys.next_checkpoint_no = checkpoint_no; } } @@ -3713,8 +3714,8 @@ recv_reset_logs( log_sys.lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE); - log_sys.log.lsn = log_sys.lsn; - log_sys.log.lsn_offset = LOG_FILE_HDR_SIZE; + log_sys.log.set_lsn(log_sys.lsn); + log_sys.log.set_lsn_offset(LOG_FILE_HDR_SIZE); log_sys.buf_next_to_write = 0; log_sys.write_lsn = log_sys.lsn; From ee7a4f4462e312d7aca81eb96e81224905741acd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 8 Apr 2019 17:13:37 +0300 Subject: [PATCH 11/12] MDEV-12266: Pass fil_space_t* to fseg_free_page() fseg_free_page_func(): Avoid an unnecessary tablespace ID lookup. The callers should pass the tablespace that they already know. --- storage/innobase/btr/btr0btr.cc | 3 +-- storage/innobase/fsp/fsp0fsp.cc | 34 +++++++++++++++++------------- storage/innobase/ibuf/ibuf0ibuf.cc | 3 ++- storage/innobase/include/fsp0fsp.h | 28 +++++++++++++----------- storage/innobase/trx/trx0undo.cc | 31 ++++++++++++--------------- 5 files changed, 51 insertions(+), 48 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 67b5af6c63d..a0a75fae1cf 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -750,8 +750,7 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, ? PAGE_HEADER + PAGE_BTR_SEG_LEAF : PAGE_HEADER + PAGE_BTR_SEG_TOP]; fseg_free_page(seg_header, - block->page.id.space(), - block->page.id.page_no(), + index->table->space, block->page.id.page_no(), block->index != NULL, mtr); /* The page was marked free in the allocation bitmap, but it diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 30473dd563e..2b22cd253eb 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -1360,7 +1360,7 @@ fsp_alloc_free_page( /** Frees a single page of a space. The page is marked as free and clean. @param[in,out] space tablespace -@param[in] page_id page id +@param[in] offset page number @param[in] page_size page size @param[in,out] mtr mini-transaction */ static @@ -2923,35 +2923,39 @@ fseg_free_page_low( fseg_free_page_low(inode, space, offset, page_size, mtr) #endif /* !BTR_CUR_HASH_ADAPT */ -/**********************************************************************//** -Frees a single page of a segment. */ +/** Free a page in a file segment. +@param[in,out] seg_header file segment header +@param[in,out] space tablespace +@param[in] offset page number +@param[in] ahi whether we may need to drop the adaptive +hash index +@param[in,out] mtr mini-transaction */ void fseg_free_page_func( - fseg_header_t* seg_header, /*!< in: segment header */ - ulint space_id,/*!< in: space id */ - ulint page, /*!< in: page offset */ + fseg_header_t* seg_header, + fil_space_t* space, + ulint offset, #ifdef BTR_CUR_HASH_ADAPT - bool ahi, /*!< in: whether we may need to drop - the adaptive hash index */ + bool ahi, #endif /* BTR_CUR_HASH_ADAPT */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + mtr_t* mtr) { DBUG_ENTER("fseg_free_page"); fseg_inode_t* seg_inode; buf_block_t* iblock; - fil_space_t* space = mtr_x_lock_space(space_id, mtr); + mtr_x_lock(&space->latch, mtr); const page_size_t page_size(space->flags); - DBUG_LOG("fseg_free_page", "space_id: " << space_id - << ", page_no: " << page); + DBUG_LOG("fseg_free_page", "space_id: " << space->id + << ", page_no: " << offset); - seg_inode = fseg_inode_get(seg_header, space_id, page_size, mtr, + seg_inode = fseg_inode_get(seg_header, space->id, page_size, mtr, &iblock); fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr); - fseg_free_page_low(seg_inode, space, page, page_size, ahi, mtr); + fseg_free_page_low(seg_inode, space, offset, page_size, ahi, mtr); - ut_d(buf_page_set_file_page_was_freed(page_id_t(space_id, page))); + ut_d(buf_page_set_file_page_was_freed(page_id_t(space->id, offset))); DBUG_VOID_RETURN; } diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 261fbb05c63..e3c61ca29cc 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -2139,8 +2139,9 @@ ibuf_remove_free_page(void) the free list was so long that they cannot have taken the last page from it. */ + compile_time_assert(IBUF_SPACE_ID == 0); fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, - IBUF_SPACE_ID, page_no, false, &mtr); + fil_system.sys_space, page_no, false, &mtr); const page_id_t page_id(IBUF_SPACE_ID, page_no); diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index b8952a5ee17..5709a0bb11b 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -508,24 +508,28 @@ fsp_reserve_free_extents( mtr_t* mtr, ulint n_pages = 2); -/**********************************************************************//** -Frees a single page of a segment. */ +/** Free a page in a file segment. +@param[in,out] seg_header file segment header +@param[in,out] space tablespace +@param[in] offset page number +@param[in] ahi whether we may need to drop the adaptive +hash index +@param[in,out] mtr mini-transaction */ void fseg_free_page_func( - fseg_header_t* seg_header, /*!< in: segment header */ - ulint space_id, /*!< in: space id */ - ulint page, /*!< in: page offset */ + fseg_header_t* seg_header, + fil_space_t* space, + ulint offset, #ifdef BTR_CUR_HASH_ADAPT - bool ahi, /*!< in: whether we may need to drop - the adaptive hash index */ + bool ahi, #endif /* BTR_CUR_HASH_ADAPT */ - mtr_t* mtr); /*!< in/out: mini-transaction */ + mtr_t* mtr); #ifdef BTR_CUR_HASH_ADAPT -# define fseg_free_page(header, space_id, page, ahi, mtr) \ - fseg_free_page_func(header, space_id, page, ahi, mtr) +# define fseg_free_page(header, space, offset, ahi, mtr) \ + fseg_free_page_func(header, space, offset, ahi, mtr) #else /* BTR_CUR_HASH_ADAPT */ -# define fseg_free_page(header, space_id, page, ahi, mtr) \ - fseg_free_page_func(header, space_id, page, mtr) +# define fseg_free_page(header, space, offset, ahi, mtr) \ + fseg_free_page_func(header, space, offset, mtr) #endif /* BTR_CUR_HASH_ADAPT */ /** Determine whether a page is free. @param[in,out] space tablespace diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 354d3c8d848..45088b688ed 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -831,35 +831,30 @@ trx_undo_free_page( undo log page; the caller must have reserved the rollback segment mutex */ { - page_t* header_page; - page_t* undo_page; - fil_addr_t last_addr; - trx_rsegf_t* rseg_header; - ulint hist_size; const ulint space = rseg->space->id; ut_a(hdr_page_no != page_no); ut_ad(mutex_own(&(rseg->mutex))); - undo_page = trx_undo_page_get(page_id_t(space, page_no), mtr); + page_t* undo_page = trx_undo_page_get(page_id_t(space, page_no), mtr); + page_t* header_page = trx_undo_page_get(page_id_t(space, hdr_page_no), + mtr); - header_page = trx_undo_page_get(page_id_t(space, hdr_page_no), mtr); + flst_remove(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + header_page, + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + undo_page, mtr); - flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, - undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); + fseg_free_page(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + header_page, + rseg->space, page_no, false, mtr); - fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, - space, page_no, false, mtr); - - last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR - + TRX_UNDO_PAGE_LIST, mtr); + const fil_addr_t last_addr = flst_get_last( + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + header_page, mtr); rseg->curr_size--; if (in_history) { - rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr); - - hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, mtr); + trx_rsegf_t* rseg_header = trx_rsegf_get( + rseg->space, rseg->page_no, mtr); + uint32_t hist_size = mach_read_from_4( + rseg_header + TRX_RSEG_HISTORY_SIZE); ut_ad(hist_size > 0); mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, hist_size - 1, MLOG_4BYTES, mtr); From 937ec3c48d2068dfb76d47cb409eb19d38677da1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 8 Apr 2019 21:53:30 +0300 Subject: [PATCH 12/12] MDEV-19212: After-merge fix for sizeof(ulong)!=sizeof(ulint) --- storage/innobase/buf/buf0buf.cc | 10 +++++----- storage/innobase/log/log0log.cc | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 81eeba2b1ad..b709fe643fa 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1568,12 +1568,12 @@ buf_chunk_init( /* Round down to a multiple of page size, although it already should be. */ - mem_size = ut_2pow_round(mem_size, ulint(srv_page_size)); + mem_size = ut_2pow_round(mem_size, srv_page_size); /* Reserve space for the block descriptors. */ - mem_size += ut_2pow_round((mem_size >> srv_page_size_shift) - * (sizeof *block) - + (srv_page_size - 1), - ulint(srv_page_size)); + mem_size += ut_2pow_round((mem_size >> srv_page_size_shift) + * (sizeof *block) + + (srv_page_size - 1), + srv_page_size); DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return(NULL);); diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 8a80d2bbfd4..0ffbe7bf327 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -897,8 +897,8 @@ log_buffer_switch() ut_ad(log_write_mutex_own()); const byte* old_buf = log_sys.buf; - ulint area_end = ut_calc_align( - log_sys.buf_free, ulint(OS_FILE_LOG_BLOCK_SIZE)); + ulong area_end = ut_calc_align( + log_sys.buf_free, ulong(OS_FILE_LOG_BLOCK_SIZE)); if (log_sys.first_in_use) { log_sys.first_in_use = false;