diff --git a/mysql-test/suite/innodb_zip/r/innochecksum_3.result b/mysql-test/suite/innodb_zip/r/innochecksum_3.result index 946c86b2a09..cfe1e4852ba 100644 --- a/mysql-test/suite/innodb_zip/r/innochecksum_3.result +++ b/mysql-test/suite/innodb_zip/r/innochecksum_3.result @@ -172,7 +172,7 @@ Filename::tab#.ibd #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - -#::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - +#::# | Freshly allocated page | - #::# | Freshly allocated page | - # Variables used by page type dump for ibdata1 @@ -207,7 +207,7 @@ Filename::tab#.ibd #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - -#::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - +#::# | Freshly allocated page | - #::# | Freshly allocated page | - [6]: check the valid lower bound values for option # allow-mismatches,page,start-page,end-page diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index 5522ba54b96..05294568a6e 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -962,7 +962,8 @@ fail: } ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - ut_ad(!block->page.file_page_was_freed); + DBUG_ASSERT(block->page.status != buf_page_t::FREED); + buf_page_set_accessed(&block->page); buf_block_buf_fix_inc(block, __FILE__, __LINE__); mutex_exit(&block->mutex); @@ -1313,7 +1314,7 @@ void btr_search_drop_page_hash_when_freed(const page_id_t page_id) /* If AHI is still valid, page can't be in free state. AHI is dropped when page is freed. */ - ut_ad(!block->page.file_page_was_freed); + DBUG_ASSERT(block->page.status != buf_page_t::FREED); buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH); diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 24aef3fed5e..7ae510a8851 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1355,13 +1355,11 @@ buf_block_init(buf_block_t* block, byte* frame) block->page.state = BUF_BLOCK_NOT_USED; block->page.buf_fix_count = 0; block->page.io_fix = BUF_IO_NONE; - block->page.init_on_flush = false; block->page.real_size = 0; block->page.write_size = 0; block->modify_clock = 0; block->page.slot = NULL; - - ut_d(block->page.file_page_was_freed = FALSE); + block->page.status = buf_page_t::NORMAL; #ifdef BTR_CUR_HASH_ADAPT block->index = NULL; @@ -3211,59 +3209,65 @@ void buf_page_make_young(buf_page_t* bpage) mutex_exit(&buf_pool->mutex); } +/** Mark the page status as FREED for the given tablespace id and +page number. If the page is not in the buffer pool then ignore it. +X-lock should be taken on the page before marking the page status +as FREED. It avoids the concurrent flushing of freed page. +Currently, this function only marks the page as FREED if it is +in buffer pool. +@param[in] page_id page id +@param[in,out] mtr mini-transaction +@param[in] file file name +@param[in] line line where called */ +void buf_page_free(const page_id_t page_id, + mtr_t *mtr, + const char *file, + unsigned line) +{ + ut_ad(mtr); + ut_ad(mtr->is_active()); + buf_pool->stat.n_page_gets++; + rw_lock_t *hash_lock= buf_page_hash_lock_get(page_id); + rw_lock_s_lock(hash_lock); + + /* page_hash can be changed. */ + hash_lock= buf_page_hash_lock_s_confirm(hash_lock, page_id); + buf_block_t *block= reinterpret_cast + (buf_page_hash_get_low(page_id)); + + if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) + { + /* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE, + but avoid buf_zip_decompress() */ + /* FIXME: If block==NULL, introduce a separate data structure + to cover freed page ranges to augment buf_flush_freed_page() */ + rw_lock_s_unlock(hash_lock); + return; + } + + block->fix(); + mutex_enter(&block->mutex); + /* Now safe to release page_hash mutex */ + rw_lock_s_unlock(hash_lock); + ut_ad(block->page.buf_fix_count > 0); + #ifdef UNIV_DEBUG -/** Sets file_page_was_freed TRUE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. -@param[in] page_id page id -@return control block if found in page hash table, otherwise NULL */ -buf_page_t* buf_page_set_file_page_was_freed(const page_id_t page_id) -{ - buf_page_t* bpage; - rw_lock_t* hash_lock; - - bpage = buf_page_hash_get_s_locked(page_id, &hash_lock); - - if (bpage) { - BPageMutex* block_mutex = buf_page_get_mutex(bpage); - ut_ad(!buf_pool_watch_is_sentinel(bpage)); - mutex_enter(block_mutex); - rw_lock_s_unlock(hash_lock); - /* bpage->file_page_was_freed can already hold - when this code is invoked from dict_drop_index_tree() */ - bpage->file_page_was_freed = TRUE; - mutex_exit(block_mutex); - } - - return(bpage); -} - -/** Sets file_page_was_freed FALSE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. -@param[in] page_id page id -@return control block if found in page hash table, otherwise NULL */ -buf_page_t* buf_page_reset_file_page_was_freed(const page_id_t page_id) -{ - buf_page_t* bpage; - rw_lock_t* hash_lock; - - bpage = buf_page_hash_get_s_locked(page_id, &hash_lock); - if (bpage) { - BPageMutex* block_mutex = buf_page_get_mutex(bpage); - ut_ad(!buf_pool_watch_is_sentinel(bpage)); - mutex_enter(block_mutex); - rw_lock_s_unlock(hash_lock); - bpage->file_page_was_freed = FALSE; - mutex_exit(block_mutex); - } - - return(bpage); -} + if (!fsp_is_system_temporary(page_id.space())) + { + ibool ret= rw_lock_s_lock_nowait(block->debug_latch, file, line); + ut_a(ret); + } #endif /* UNIV_DEBUG */ + mtr_memo_type_t fix_type= MTR_MEMO_PAGE_X_FIX; + rw_lock_x_lock_inline(&block->lock, 0, file, line); + mtr_memo_push(mtr, block, fix_type); + + block->page.status= buf_page_t::FREED; + buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); + mutex_exit(&block->mutex); +} + /** Attempts to discard the uncompressed frame of a compressed page. The caller should not be holding any mutexes when this function is called. @param[in] page_id page id */ @@ -3382,7 +3386,7 @@ got_block: rw_lock_s_unlock(hash_lock); - ut_ad(!bpage->file_page_was_freed); + DBUG_ASSERT(bpage->status != buf_page_t::FREED); buf_page_set_accessed(bpage); @@ -4282,7 +4286,7 @@ evict_from_pool: "btr_search_drop_page_hash_when_freed". */ ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL - || !fix_block->page.file_page_was_freed); + || fix_block->page.status != buf_page_t::FREED); /* Check if this is the first access to the page */ access_time = buf_page_is_accessed(&fix_block->page); @@ -4472,10 +4476,6 @@ buf_page_optimistic_get( ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - ut_d(buf_page_mutex_enter(block)); - ut_ad(!block->page.file_page_was_freed); - ut_d(buf_page_mutex_exit(block)); - if (!access_time) { /* In the case of a first access, try to apply linear read-ahead */ @@ -4558,10 +4558,6 @@ buf_page_try_get_func( ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - ut_d(buf_page_mutex_enter(block)); - ut_d(ut_a(!block->page.file_page_was_freed)); - ut_d(buf_page_mutex_exit(block)); - buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); buf_pool->stat.n_page_gets++; @@ -4588,10 +4584,8 @@ buf_page_init_low( bpage->real_size = 0; bpage->slot = NULL; bpage->ibuf_exist = false; - + bpage->status = buf_page_t::NORMAL; HASH_INVALIDATE(bpage, hash); - - ut_d(bpage->file_page_was_freed = FALSE); } /** Inits a page to the buffer buf_pool. @@ -4844,7 +4838,7 @@ buf_page_init_for_read( bpage->state = BUF_BLOCK_ZIP_PAGE; bpage->id = page_id; - bpage->init_on_flush = false; + bpage->status = buf_page_t::NORMAL; ut_d(bpage->in_page_hash = FALSE); ut_d(bpage->in_zip_hash = FALSE); @@ -4936,8 +4930,6 @@ buf_page_create( if (block && buf_page_in_file(&block->page) && !buf_pool_watch_is_sentinel(&block->page)) { - ut_d(block->page.file_page_was_freed = FALSE); - /* Page can be found in buf_pool */ mutex_exit(&buf_pool->mutex); rw_lock_x_unlock(hash_lock); @@ -4945,8 +4937,13 @@ buf_page_create( buf_block_free(free_block); if (!recv_recovery_is_on()) { - return buf_page_get_with_no_latch(page_id, zip_size, - mtr); + /* FIXME: Remove the redundant lookup and avoid + the unnecessary invocation of buf_zip_decompress(). + We may have to convert buf_page_t to buf_block_t, + but we are going to initialize the page. */ + return buf_page_get_gen(page_id, zip_size, RW_NO_LATCH, + block, BUF_GET_POSSIBLY_FREED, + __FILE__, __LINE__, mtr); } mutex_exit(&recv_sys.mutex); diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 25584e5e6e2..8a7dd5578ad 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -897,6 +897,10 @@ a page is written to disk. (may be src_frame or an encrypted/compressed copy of it) */ static byte* buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s) { + if (bpage->status == buf_page_t::FREED) { + return s; + } + ut_ad(space->id == bpage->id.space()); bpage->real_size = srv_page_size; @@ -1022,6 +1026,62 @@ not_compressed: return d; } +/** The following function deals with freed page during flushing. + i) Writing zeros to the file asynchronously if scrubbing is enabled + ii) Punch the hole to the file synchoronously if page_compressed is + enabled for the tablespace +This function also resets the IO_FIX to IO_NONE and making the +page status as NORMAL. It initiates the write to the file only after +releasing the page from flush list and its associated mutex. +@param[in,out] bpage freed buffer page +@param[in] space tablespace object of the freed page */ +static void buf_flush_freed_page(buf_page_t* bpage, fil_space_t* space) +{ + const page_id_t page_id(bpage->id.space(), bpage->id.page_no()); + BPageMutex* block_mutex = buf_page_get_mutex(bpage); + const bool uncompressed = (buf_page_get_state(bpage) + == BUF_BLOCK_FILE_PAGE); + bool punch_hole = false; + + mutex_enter(&buf_pool->mutex); + mutex_enter(block_mutex); + + buf_page_set_io_fix(bpage, BUF_IO_NONE); + bpage->status = buf_page_t::NORMAL; + buf_flush_write_complete(bpage, false); + + if (uncompressed) { + rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock, + BUF_IO_WRITE); + } + + buf_pool->stat.n_pages_written++; + mutex_exit(block_mutex); + mutex_exit(&buf_pool->mutex); + + if (space->is_compressed()) { +#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32) + punch_hole = (space != fil_system.temp_space + && space->is_compressed()); + +#endif + } + + if (srv_immediate_scrub_data_uncompressed || punch_hole) { + /* Zero write the page */ + ulint type = IORequest::WRITE; + IORequest request(type, NULL); + page_t* frame = const_cast(field_ref_zero); + + fil_io(request, punch_hole ? true :false, + page_id, space->zip_size(), 0, + space->physical_size(), frame, NULL, + false, punch_hole); + } + + space->release_for_io(); +} + /********************************************************************//** Does an asynchronous write of a buffer page. NOTE: when the doublewrite buffer is used, we must call @@ -1084,6 +1144,12 @@ buf_flush_write_block_low( frame = ((buf_block_t*) bpage)->frame; } + /* Skip the encryption and compression for the + freed page */ + if (bpage->status == buf_page_t::FREED) { + break; + } + byte* page = reinterpret_cast(bpage)->frame; if (full_crc32) { @@ -1111,8 +1177,13 @@ buf_flush_write_block_low( ut_ad(space->atomic_write_supported); } - const bool use_doublewrite = !bpage->init_on_flush - && space->use_doublewrite(); + if (bpage->status == buf_page_t::FREED) { + buf_flush_freed_page(bpage, space); + return; + } + + const bool use_doublewrite = bpage->status != buf_page_t::INIT_ON_FLUSH + && space->use_doublewrite(); if (!use_doublewrite) { ulint type = IORequest::WRITE; @@ -1191,17 +1262,14 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync) ut_ad(buf_flush_ready_for_flush(bpage, flush_type)); - bool is_uncompressed; - - is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); + bool is_uncompressed = (buf_page_get_state(bpage) + == BUF_BLOCK_FILE_PAGE); ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex)); - ibool flush; rw_lock_t* rw_lock; bool no_fix_count = bpage->buf_fix_count == 0; if (!is_uncompressed) { - flush = TRUE; rw_lock = NULL; } else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST) || (!no_fix_count @@ -1211,61 +1279,55 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync) /* For table residing in temporary tablespace sync is done using IO_FIX and so before scheduling for flush ensure that page is not fixed. */ - flush = FALSE; + return false; } else { rw_lock = &reinterpret_cast(bpage)->lock; - if (flush_type != BUF_FLUSH_LIST) { - flush = rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE); - } else { - /* Will SX lock later */ - flush = TRUE; - } - } - - if (flush) { - - /* We are committed to flushing by the time we get here */ - - buf_page_set_io_fix(bpage, BUF_IO_WRITE); - - buf_page_set_flush_type(bpage, flush_type); - - if (buf_pool->n_flush[flush_type] == 0) { - os_event_reset(buf_pool->no_flush[flush_type]); - } - - ++buf_pool->n_flush[flush_type]; - ut_ad(buf_pool->n_flush[flush_type] != 0); - - mutex_exit(block_mutex); - - mutex_exit(&buf_pool->mutex); - - if (flush_type == BUF_FLUSH_LIST - && is_uncompressed + if (flush_type != BUF_FLUSH_LIST && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) { - - if (!fsp_is_system_temporary(bpage->id.space())) { - /* avoiding deadlock possibility involves - doublewrite buffer, should flush it, because - it might hold the another block->lock. */ - buf_dblwr_flush_buffered_writes(); - } else { - buf_dblwr_sync_datafiles(); - } - - rw_lock_sx_lock_gen(rw_lock, BUF_IO_WRITE); + return false; } - - /* Even though bpage is not protected by any mutex at this - point, it is safe to access bpage, because it is io_fixed and - oldest_modification != 0. Thus, it cannot be relocated in the - buffer pool or removed from flush_list or LRU_list. */ - - buf_flush_write_block_low(bpage, flush_type, sync); } - return(flush); + /* We are committed to flushing by the time we get here */ + + buf_page_set_io_fix(bpage, BUF_IO_WRITE); + + buf_page_set_flush_type(bpage, flush_type); + + if (buf_pool->n_flush[flush_type] == 0) { + os_event_reset(buf_pool->no_flush[flush_type]); + } + + ++buf_pool->n_flush[flush_type]; + ut_ad(buf_pool->n_flush[flush_type] != 0); + + mutex_exit(block_mutex); + + mutex_exit(&buf_pool->mutex); + + if (flush_type == BUF_FLUSH_LIST + && is_uncompressed + && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) { + + if (!fsp_is_system_temporary(bpage->id.space())) { + /* avoiding deadlock possibility involves + doublewrite buffer, should flush it, because + it might hold the another block->lock. */ + buf_dblwr_flush_buffered_writes(); + } else { + buf_dblwr_sync_datafiles(); + } + + rw_lock_sx_lock_gen(rw_lock, BUF_IO_WRITE); + } + + /* Even though bpage is not protected by any mutex at this + point, it is safe to access bpage, because it is io_fixed and + oldest_modification != 0. Thus, it cannot be relocated in the + buffer pool or removed from flush_list or LRU_list. */ + + buf_flush_write_block_low(bpage, flush_type, sync); + return true; } # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index b946ae3ae91..8e029737441 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -3892,6 +3892,8 @@ inline void IORequest::set_fil_node(fil_node_t* node) @param[in] message message for aio handler if non-sync aio used, else ignored @param[in] ignore whether to ignore out-of-bounds page_id +@param[in] punch_hole punch the hole to the file for page_compressed + tablespace @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace which does not exist */ dberr_t @@ -3904,7 +3906,8 @@ fil_io( ulint len, void* buf, void* message, - bool ignore) + bool ignore, + bool punch_hole) { os_offset_t offset; IORequest req_type(type); @@ -4083,13 +4086,20 @@ fil_io( || !fil_is_user_tablespace_id(page_id.space()) || offset == page_id.page_no() * zip_size); - /* Queue the aio request */ - dberr_t err = os_aio( - req_type, - mode, name, node->handle, buf, offset, len, - space->purpose != FIL_TYPE_TEMPORARY - && srv_read_only_mode, - node, message); + dberr_t err = DB_SUCCESS; + + if (punch_hole) { + /* Punch the hole to the file */ + err = os_file_punch_hole(node->handle, offset, len); + } else { + /* Queue the aio request */ + err = os_aio( + req_type, + mode, name, node->handle, buf, offset, len, + space->purpose != FIL_TYPE_TEMPORARY + && srv_read_only_mode, + node, message); + } /* We an try to recover the page from the double write buffer if the decompression fails or the page is corrupt. */ @@ -4154,8 +4164,8 @@ void fil_aio_callback(os_aio_userdata_t *data) } ulint offset = bpage->id.page_no(); - if (dblwr && bpage->init_on_flush) { - bpage->init_on_flush = false; + if (dblwr && bpage->status == buf_page_t::INIT_ON_FLUSH) { + bpage->status = buf_page_t::NORMAL; dblwr = false; } dberr_t err = buf_page_io_complete(bpage, dblwr); diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 6eefabdfe52..c47a1d0a95c 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -554,8 +554,10 @@ void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr) const ulint zip_size = space->zip_size(); mtr_x_lock_space(space, mtr); + + const auto savepoint = mtr->get_savepoint(); buf_block_t* block = buf_page_create(page_id, zip_size, mtr); - buf_page_get(page_id, zip_size, RW_SX_LATCH, mtr); + mtr->sx_latch_at_savepoint(savepoint, block); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); space->size_in_header = size; @@ -873,16 +875,12 @@ fsp_fill_free_list( pages should be ignored. */ if (i > 0) { - const page_id_t page_id(space->id, i); - - block = buf_page_create( - page_id, zip_size, mtr); - - buf_page_get( - page_id, zip_size, RW_SX_LATCH, mtr); + const auto savepoint = mtr->get_savepoint(); + block= buf_page_create(page_id_t(space->id, i), + zip_size, mtr); + mtr->sx_latch_at_savepoint(savepoint, block); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); - fsp_init_file_page(space, block, mtr); mtr->write<2>(*block, FIL_PAGE_TYPE + block->frame, @@ -900,17 +898,11 @@ fsp_fill_free_list( ibuf_mtr.start(); ibuf_mtr.set_named_space(space); - const page_id_t page_id( - space->id, - i + FSP_IBUF_BITMAP_OFFSET); - block = buf_page_create( - page_id, zip_size, &ibuf_mtr); - - buf_page_get( - page_id, zip_size, RW_SX_LATCH, - &ibuf_mtr); - + page_id_t(space->id, + i + FSP_IBUF_BITMAP_OFFSET), + zip_size, &ibuf_mtr); + ibuf_mtr.sx_latch_at_savepoint(0, block); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); fsp_init_file_page(space, block, &ibuf_mtr); @@ -1060,8 +1052,7 @@ fsp_alloc_from_free_frag(buf_block_t *header, buf_block_t *xdes, xdes_t *descr, @param[in,out] space tablespace @param[in] offset page number of the allocated page @param[in] rw_latch RW_SX_LATCH, RW_X_LATCH -@param[in,out] mtr mini-transaction of the allocation -@param[in,out] init_mtr mini-transaction for initializing the page +@param[in,out] mtr mini-transaction @return block, initialized */ static buf_block_t* @@ -1069,13 +1060,13 @@ fsp_page_create( fil_space_t* space, page_no_t offset, rw_lock_type_t rw_latch, - mtr_t* mtr, - mtr_t* init_mtr) + mtr_t* mtr) { buf_block_t* block = buf_page_create(page_id_t(space->id, offset), - space->zip_size(), init_mtr); + space->zip_size(), mtr); - /* Mimic buf_page_get(), but avoid the buf_pool->page_hash lookup. */ + /* The latch may already have been acquired, so we cannot invoke + mtr_t::x_latch_at_savepoint() or mtr_t::sx_latch_at_savepoint(). */ mtr_memo_type_t memo; if (rw_latch == RW_X_LATCH) { @@ -1087,9 +1078,9 @@ fsp_page_create( memo = MTR_MEMO_PAGE_SX_FIX; } - mtr_memo_push(init_mtr, block, memo); + mtr_memo_push(mtr, block, memo); buf_block_buf_fix_inc(block, __FILE__, __LINE__); - fsp_init_file_page(space, block, init_mtr); + fsp_init_file_page(space, block, mtr); return(block); } @@ -1202,7 +1193,7 @@ fsp_alloc_free_page( } fsp_alloc_from_free_frag(block, xdes, descr, free, mtr); - return fsp_page_create(space, page_no, rw_latch, mtr, init_mtr); + return fsp_page_create(space, page_no, rw_latch, init_mtr); } /** Frees a single page of a space. @@ -2233,7 +2224,7 @@ got_hinted_page: xdes, mtr); } - return fsp_page_create(space, ret_page, rw_latch, mtr, init_mtr); + return fsp_page_create(space, ret_page, rw_latch, init_mtr); } /**********************************************************************//** @@ -2642,7 +2633,7 @@ fseg_free_page_func( fseg_free_page_low(seg_inode, iblock, space, offset, ahi, mtr); - ut_d(buf_page_set_file_page_was_freed(page_id_t(space->id, offset))); + buf_page_free(page_id_t(space->id, offset), mtr, __FILE__, __LINE__); DBUG_VOID_RETURN; } @@ -2747,13 +2738,13 @@ fseg_free_extent( fsp_free_extent(space, page, mtr); -#ifdef UNIV_DEBUG for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) { - - buf_page_set_file_page_was_freed( - page_id_t(space->id, first_page_in_extent + i)); + if (!xdes_is_free(descr, i)) { + buf_page_free( + page_id_t(space->id, first_page_in_extent + i), + mtr, __FILE__, __LINE__); + } } -#endif /* UNIV_DEBUG */ } #ifndef BTR_CUR_HASH_ADAPT diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 94a5af0015c..56fe51baccc 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -1948,8 +1948,6 @@ ibuf_remove_free_page(void) const page_id_t page_id(IBUF_SPACE_ID, page_no); - ut_d(buf_page_reset_file_page_was_freed(page_id)); - ibuf_enter(&mtr); mutex_enter(&ibuf_mutex); @@ -1982,7 +1980,7 @@ ibuf_remove_free_page(void) ibuf_bitmap_page_set_bits( bitmap_page, page_id, srv_page_size, false, &mtr); - ut_d(buf_page_set_file_page_was_freed(page_id)); + buf_page_free(page_id, &mtr, __FILE__, __LINE__); ibuf_mtr_commit(&mtr); } diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 3432ef12df9..3afb6e49431 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -372,10 +372,7 @@ FILE_PAGE (the other is buf_page_get_gen). @param[in,out] mtr mini-transaction @return pointer to the block, page bufferfixed */ buf_block_t* -buf_page_create( - const page_id_t page_id, - ulint zip_size, - mtr_t* mtr); +buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr); /********************************************************************//** Releases a compressed-only page acquired with buf_page_get_zip(). */ @@ -402,24 +399,17 @@ buf_page_make_young( /*================*/ buf_page_t* bpage); /*!< in: buffer block of a file page */ -#ifdef UNIV_DEBUG -/** Sets file_page_was_freed TRUE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. -@param[in] page_id page id -@return control block if found in page hash table, otherwise NULL */ -buf_page_t* buf_page_set_file_page_was_freed(const page_id_t page_id); +/** Mark the page status as FREED for the given tablespace id and +page number. If the page is not in buffer pool then ignore it. +@param[in] page_id page_id +@param[in,out] mtr mini-transaction +@param[in] file file name +@param[in] line line where called */ +void buf_page_free(const page_id_t page_id, + mtr_t *mtr, + const char *file, + unsigned line); -/** Sets file_page_was_freed FALSE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. -@param[in] page_id page id -@return control block if found in page hash table, otherwise NULL */ -buf_page_t* buf_page_reset_file_page_was_freed(const page_id_t page_id); - -#endif /* UNIV_DEBUG */ /********************************************************************//** Reads the freed_page_clock of a buffer block. @return freed_page_clock */ @@ -1244,13 +1234,6 @@ public: if written again we check is TRIM operation needed. */ - /** whether the page will be (re)initialized at the time it will - be written to the file, that is, whether the doublewrite buffer - can be safely skipped. Protected under similar conditions as - buf_block_t::frame. Can be set while holding buf_block_t::lock - X-latch and reset during page flush, while io_fix is in effect. */ - bool init_on_flush; - ulint real_size; /*!< Real size of the page Normal pages == srv_page_size page compressed pages, payload @@ -1365,17 +1348,25 @@ public: and bytes allocated for recv_sys.pages, the field is protected by recv_sys_t::mutex. */ -# ifdef UNIV_DEBUG - ibool file_page_was_freed; - /*!< this is set to TRUE when - fsp frees a page in buffer pool; - protected by buf_pool->zip_mutex - or buf_block_t::mutex. */ -# endif /* UNIV_DEBUG */ /** Change buffer entries for the page exist. Protected by io_fix==BUF_IO_READ or by buf_block_t::lock. */ bool ibuf_exist; + /** Block initialization status. Can be modified while holding io_fix + or buf_block_t::lock X-latch */ + enum { + /** the page was read normally and should be flushed normally */ + NORMAL = 0, + /** the page was (re)initialized, and the doublewrite buffer can be + skipped on the next flush */ + INIT_ON_FLUSH, + /** the page was freed and need to be flushed. + For page_compressed, page flush will punch a hole to free space. + Else if innodb_immediate_scrub_data_uncompressed, the page will + be overwritten with zeroes. */ + FREED + } status; + void fix() { buf_fix_count++; } uint32_t unfix() { diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index dcfca4a7514..8c1cd2e9a34 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1378,6 +1378,8 @@ fil_space_extend( @param[in] message message for aio handler if non-sync aio used, else ignored @param[in] ignore whether to ignore out-of-bounds page_id +@param[in] punch_hole punch the hole to the file for page_compressed + tablespace @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace which does not exist */ dberr_t @@ -1390,7 +1392,8 @@ fil_io( ulint len, void* buf, void* message, - bool ignore = false); + bool ignore = false, + bool punch_hole = false); /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index 45398f787e0..29765faa017 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -502,6 +502,8 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, @param[in,out] b buffer page */ inline void mtr_t::init(buf_block_t *b) { + b->page.status= buf_page_t::INIT_ON_FLUSH; + if (m_log_mode != MTR_LOG_ALL) { ut_ad(m_log_mode == MTR_LOG_NONE || m_log_mode == MTR_LOG_NO_REDO); @@ -510,7 +512,6 @@ inline void mtr_t::init(buf_block_t *b) m_log.close(log_write(b->page.id, &b->page)); m_last_offset= FIL_PAGE_TYPE; - b->page.init_on_flush= true; } /** Free a page. diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 525c81af488..9e4bf5faf23 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -4984,7 +4984,7 @@ loop: goto function_exit; } - ut_ad(!block->page.file_page_was_freed); + DBUG_ASSERT(block->page.status != buf_page_t::FREED); for (i = 0; i < nth_lock; i++) { @@ -5090,7 +5090,7 @@ lock_rec_block_validate( /* The lock and the block that it is referring to may be freed at this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check. If the lock exists in lock_rec_validate_page() we assert - !block->page.file_page_was_freed. */ + block->page.status != FREED. */ buf_block_t* block; mtr_t mtr; diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index fa72090d651..90a443c8b20 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -37,6 +37,7 @@ Created June 2005 by Marko Makela /** A BLOB field reference full of zero, for use in assertions and tests. Initially, BLOB field references are set to zero, in dtuple_convert_big_rec(). */ +alignas(UNIV_PAGE_SIZE_MIN) const byte field_ref_zero[UNIV_PAGE_SIZE_MAX] = { 0, }; #include "mtr0log.h" @@ -450,7 +451,7 @@ static void page_zip_compress_write_log(buf_block_t *block, if (trailer_size) mtr->zmemcpy(block->page, page_zip_get_size(page_zip) - trailer_size, trailer_size); - block->page.init_on_flush= true; /* because of mtr_t::init() */ + block->page.status = buf_page_t::INIT_ON_FLUSH; /* because of mtr_t::init() */ } /******************************************************//**