From adb93e54e8b08b44425fe3b4647debdbdc948e73 Mon Sep 17 00:00:00 2001 From: Satya B Date: Fri, 9 Oct 2009 19:43:15 +0530 Subject: [PATCH] Applying InnoDB Plugin 1.0.5 snapshot ,part 12 From r5995 to r6043 Detailed revision comments: r5995 | marko | 2009-09-28 03:52:25 -0500 (Mon, 28 Sep 2009) | 17 lines branches/zip: Do not write to PAGE_INDEX_ID after page creation, not even when restoring an uncompressed page after a compression failure. btr_page_reorganize_low(): On compression failure, do not restore those page header fields that should not be affected by the reorganization. Instead, compare the fields. page_zip_decompress(): Add the parameter ibool all, for copying all page header fields. Pass the parameter all=TRUE on block read completion, redo log application, and page_zip_validate(); pass all=FALSE in all other cases. page_zip_reorganize(): Do not restore the uncompressed page on failure. It will be restored (to pre-modification state) by the caller anyway. rb://167, Issue #346 r5996 | marko | 2009-09-28 07:46:02 -0500 (Mon, 28 Sep 2009) | 4 lines branches/zip: Address Issue #350 in comments. lock_rec_queue_validate(), lock_rec_queue_validate(): Note that this debug code may violate the latching order and cause deadlocks. r5997 | marko | 2009-09-28 08:03:58 -0500 (Mon, 28 Sep 2009) | 12 lines branches/zip: Remove an assertion failure when the InnoDB data dictionary is inconsistent with the MySQL .frm file. ha_innobase::index_read(): When the index cannot be found, return an error. ha_innobase::change_active_index(): When prebuilt->index == NULL, set also prebuilt->index_usable = FALSE. This is not needed for correctness, because prebuilt->index_usable is only checked by row_search_for_mysql(), which requires prebuilt->index != NULL. This addresses Issue #349. Approved by Heikki Tuuri over IM. r6005 | vasil | 2009-09-29 03:09:52 -0500 (Tue, 29 Sep 2009) | 4 lines branches/zip: ChangeLog: wrap around 78th column, not earlier. r6006 | vasil | 2009-09-29 05:15:25 -0500 (Tue, 29 Sep 2009) | 4 lines branches/zip: Add ChangeLog entry for the release of 1.0.4. r6007 | vasil | 2009-09-29 08:19:59 -0500 (Tue, 29 Sep 2009) | 6 lines branches/zip: Fix the year, should be 2009. Pointed by: Calvin r6026 | marko | 2009-09-30 02:18:24 -0500 (Wed, 30 Sep 2009) | 1 line branches/zip: Add some debug assertions for checking FSEG_MAGIC_N. r6028 | marko | 2009-09-30 08:55:23 -0500 (Wed, 30 Sep 2009) | 3 lines branches/zip: recv_no_log_write: New debug flag for tracking down Mantis Issue #347. No modifications should be made to the database while recv_apply_hashed_log_recs() is about to complete. r6029 | calvin | 2009-09-30 15:32:02 -0500 (Wed, 30 Sep 2009) | 4 lines branches/zip: non-functional changes Fix typo. r6031 | marko | 2009-10-01 06:24:33 -0500 (Thu, 01 Oct 2009) | 49 lines branches/zip: Clean up after a crash during DROP INDEX. When InnoDB crashes while dropping an index, ensure that the index will be completely dropped during crash recovery. row_merge_drop_index(): Before dropping an index, rename the index to start with TEMP_INDEX_PREFIX_STR and commit the change, so that row_merge_drop_temp_indexes() will drop the index after crash recovery if the server crashes while dropping the index. fseg_inode_try_get(): New function, forked from fseg_inode_get(). Return NULL if the file segment index node is free. fseg_inode_get(): Assert that the file segment index node is not free. fseg_free_step(): If the file segment index node is already free, print a diagnostic message and return TRUE. fsp_free_seg_inode(): Write a nonzero number to FSEG_MAGIC_N, so that allocated-and-freed file segment index nodes can be better distinguished from uninitialized ones. This is rb://174, addressing Issue #348. Tested by restarting mysqld upon the completion of the added log_write_up_to() invocation below, during DROP INDEX. The index was dropped after crash recovery, and re-issuing the DROP INDEX did not crash the server. Index: btr/btr0btr.c =================================================================== --- btr/btr0btr.c (revision 6026) +++ btr/btr0btr.c (working copy) @@ -42,6 +42,7 @@ Created 6/2/1994 Heikki Tuuri #include "ibuf0ibuf.h" #include "trx0trx.h" +#include "log0log.h" /* Latching strategy of the InnoDB B-tree -------------------------------------- @@ -873,6 +874,8 @@ leaf_loop: goto leaf_loop; } + + log_write_up_to(mtr.end_lsn, LOG_WAIT_ALL_GROUPS, TRUE); top_loop: mtr_start(&mtr); r6033 | calvin | 2009-10-01 15:19:46 -0500 (Thu, 01 Oct 2009) | 4 lines branches/zip: fix a typo in error message Reported as bug#47763. r6043 | inaam | 2009-10-05 09:45:35 -0500 (Mon, 05 Oct 2009) | 12 lines branches/zip rb://176 Do not invalidate buffer pool while an LRU batch is active. Added code to buf_pool_invalidate() to wait for the running batches to finish. This patch also resets the state of buf_pool struct at invalidation. This addresses the concern where buf_pool->freed_page_clock becomes non-zero because we read in a system tablespace page for file format info at startup. Approved by: Marko --- storage/innodb_plugin/ChangeLog | 30 +++++++++++ storage/innodb_plugin/btr/btr0btr.c | 24 ++++++++- storage/innodb_plugin/buf/buf0buf.c | 37 ++++++++++++- storage/innodb_plugin/fsp/fsp0fsp.c | 62 +++++++++++++++++++--- storage/innodb_plugin/handler/ha_innodb.cc | 8 ++- storage/innodb_plugin/include/log0recv.h | 5 ++ storage/innodb_plugin/include/page0page.h | 7 ++- storage/innodb_plugin/include/page0zip.h | 12 +++-- storage/innodb_plugin/include/row0ins.h | 2 +- storage/innodb_plugin/lock/lock0lock.c | 9 ++++ storage/innodb_plugin/log/log0log.c | 12 ++++- storage/innodb_plugin/log/log0recv.c | 7 +++ storage/innodb_plugin/mtr/mtr0mtr.c | 3 ++ storage/innodb_plugin/page/page0cur.c | 2 +- storage/innodb_plugin/page/page0page.c | 4 +- storage/innodb_plugin/page/page0zip.c | 51 +++++++++++++----- storage/innodb_plugin/row/row0ins.c | 2 +- storage/innodb_plugin/row/row0merge.c | 8 +++ storage/innodb_plugin/trx/trx0trx.c | 2 +- 19 files changed, 250 insertions(+), 37 deletions(-) diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 6ae7a353142..d88a3ace872 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,29 @@ +2009-10-01 The InnoDB Team + + * fsp/fsp0fsp.c, row/row0merge.c: + Clean up after a crash during DROP INDEX. When InnoDB crashes + while dropping an index, ensure that the index will be completely + dropped during crash recovery. The MySQL .frm file may still + contain the dropped index, but there is little that we can do + about it. + +2009-09-28 The InnoDB Team + + * handler/ha_innodb.cc: + When a secondary index exists in the MySQL .frm file but not in + the InnoDB data dictionary, return an error instead of letting an + assertion fail in index_read. + +2009-09-28 The InnoDB Team + + * btr/btr0btr.c, buf/buf0buf.c, include/page0page.h, + include/page0zip.h, page/page0cur.c, page/page0page.c, + page/page0zip.c: + Do not write to PAGE_INDEX_ID when restoring an uncompressed page + after a compression failure. The field should only be written + when creating a B-tree page. This fix addresses a race condition + in a debug assertion. + 2009-09-28 The InnoDB Team * fil/fil0fil.c: @@ -114,6 +140,10 @@ Fix Bug#46657 InnoDB plugin: invalid read in index_merge_innodb test (Valgrind) +2009-08-11 The InnoDB Team + + InnoDB Plugin 1.0.4 released + 2009-07-20 The InnoDB Team * handler/ha_innodb.cc, buf/buf0rea.c, include/srv0srv.h, srv/srv0srv.c diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c index f34490ae101..108575798cf 100644 --- a/storage/innodb_plugin/btr/btr0btr.c +++ b/storage/innodb_plugin/btr/btr0btr.c @@ -41,6 +41,7 @@ Created 6/2/1994 Heikki Tuuri #include "lock0lock.h" #include "ibuf0ibuf.h" #include "trx0trx.h" +#include "log0log.h" /* Latching strategy of the InnoDB B-tree @@ -873,6 +874,8 @@ leaf_loop: goto leaf_loop; } + + log_write_up_to(mtr.end_lsn, LOG_WAIT_ALL_GROUPS, TRUE); top_loop: mtr_start(&mtr); @@ -1011,7 +1014,26 @@ btr_page_reorganize_low( (!page_zip_compress(page_zip, page, index, NULL))) { /* Restore the old page and exit. */ - buf_frame_copy(page, temp_page); + +#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG + /* Check that the bytes that we skip are identical. */ + ut_a(!memcmp(page, temp_page, PAGE_HEADER)); + ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page, + PAGE_HEADER + PAGE_N_RECS + temp_page, + PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS))); + ut_a(!memcmp(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page, + UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + temp_page, + FIL_PAGE_DATA_END)); +#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ + + memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page, + PAGE_N_RECS - PAGE_N_DIR_SLOTS); + memcpy(PAGE_DATA + page, PAGE_DATA + temp_page, + UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END); + +#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG + ut_a(!memcmp(page, temp_page, UNIV_PAGE_SIZE)); +#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ goto func_exit; } diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c index 32435bd1f87..d87abbd0ed9 100644 --- a/storage/innodb_plugin/buf/buf0buf.c +++ b/storage/innodb_plugin/buf/buf0buf.c @@ -1834,7 +1834,7 @@ buf_zip_decompress( switch (fil_page_get_type(frame)) { case FIL_PAGE_INDEX: if (page_zip_decompress(&block->page.zip, - block->frame)) { + block->frame, TRUE)) { return(TRUE); } @@ -3287,7 +3287,32 @@ void buf_pool_invalidate(void) /*=====================*/ { - ibool freed; + ibool freed; + enum buf_flush i; + + buf_pool_mutex_enter(); + + for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) { + + /* As this function is called during startup and + during redo application phase during recovery, InnoDB + is single threaded (apart from IO helper threads) at + this stage. No new write batch can be in intialization + stage at this point. */ + ut_ad(buf_pool->init_flush[i] == FALSE); + + /* However, it is possible that a write batch that has + been posted earlier is still not complete. For buffer + pool invalidation to proceed we must ensure there is NO + write activity happening. */ + if (buf_pool->n_flush[i] > 0) { + buf_pool_mutex_exit(); + buf_flush_wait_batch_end(i); + buf_pool_mutex_enter(); + } + } + + buf_pool_mutex_exit(); ut_ad(buf_all_freed()); @@ -3302,6 +3327,14 @@ buf_pool_invalidate(void) ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0); ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0); + buf_pool->freed_page_clock = 0; + buf_pool->LRU_old = NULL; + buf_pool->LRU_old_len = 0; + buf_pool->LRU_flush_ended = 0; + + memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat)); + buf_refresh_io_stats(); + buf_pool_mutex_exit(); } diff --git a/storage/innodb_plugin/fsp/fsp0fsp.c b/storage/innodb_plugin/fsp/fsp0fsp.c index 9b4dca198c1..3cc4318fc06 100644 --- a/storage/innodb_plugin/fsp/fsp0fsp.c +++ b/storage/innodb_plugin/fsp/fsp0fsp.c @@ -1848,6 +1848,8 @@ fsp_seg_inode_page_find_used( if (!ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID))) { /* This is used */ + ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) + == FSEG_MAGIC_N_VALUE); return(i); } } @@ -1879,6 +1881,9 @@ fsp_seg_inode_page_find_free( return(i); } + + ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) + == FSEG_MAGIC_N_VALUE); } return(ULINT_UNDEFINED); @@ -1997,6 +2002,8 @@ fsp_alloc_seg_inode( page + FSEG_INODE_PAGE_NODE, mtr); } + ut_ad(ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID)) + || mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); return(inode); } @@ -2034,7 +2041,7 @@ fsp_free_seg_inode( } mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr); - mlog_write_ulint(inode + FSEG_MAGIC_N, 0, MLOG_4BYTES, mtr); + mlog_write_ulint(inode + FSEG_MAGIC_N, 0xfa051ce3, MLOG_4BYTES, mtr); if (ULINT_UNDEFINED == fsp_seg_inode_page_find_used(page, zip_size, mtr)) { @@ -2050,11 +2057,11 @@ fsp_free_seg_inode( /**********************************************************************//** Returns the file segment inode, page x-latched. -@return segment inode, page x-latched */ +@return segment inode, page x-latched; NULL if the inode is free */ static fseg_inode_t* -fseg_inode_get( -/*===========*/ +fseg_inode_try_get( +/*===============*/ fseg_header_t* header, /*!< in: segment header */ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes @@ -2070,8 +2077,34 @@ fseg_inode_get( inode = fut_get_ptr(space, zip_size, inode_addr, RW_X_LATCH, mtr); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); + if (UNIV_UNLIKELY + (ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID)))) { + inode = NULL; + } else { + ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) + == FSEG_MAGIC_N_VALUE); + } + + return(inode); +} + +/**********************************************************************//** +Returns the file segment inode, page x-latched. +@return segment inode, page x-latched */ +static +fseg_inode_t* +fseg_inode_get( +/*===========*/ + fseg_header_t* header, /*!< in: segment header */ + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + mtr_t* mtr) /*!< in: mtr handle */ +{ + fseg_inode_t* inode + = fseg_inode_try_get(header, space, zip_size, mtr); + ut_a(inode); return(inode); } @@ -2089,6 +2122,7 @@ fseg_get_nth_frag_page_no( ut_ad(inode && mtr); ut_ad(n < FSEG_FRAG_ARR_N_SLOTS); ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); return(mach_read_from_4(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE)); } @@ -2107,6 +2141,7 @@ fseg_set_nth_frag_page_no( ut_ad(inode && mtr); ut_ad(n < FSEG_FRAG_ARR_N_SLOTS); ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE, page_no, MLOG_4BYTES, mtr); @@ -2467,6 +2502,8 @@ fseg_fill_free_list( xdes_set_state(descr, XDES_FSEG, mtr); seg_id = mtr_read_dulint(inode + FSEG_ID, mtr); + ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) + == FSEG_MAGIC_N_VALUE); mlog_write_dulint(descr + XDES_ID, seg_id, mtr); flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr); @@ -2495,6 +2532,7 @@ fseg_alloc_free_extent( fil_addr_t first; ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); + ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); if (flst_get_len(inode + FSEG_FREE, mtr) > 0) { /* Segment free list is not empty, allocate from it */ @@ -3152,6 +3190,8 @@ fseg_mark_page_used( ut_ad(seg_inode && mtr); ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); + ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) + == FSEG_MAGIC_N_VALUE); descr = xdes_get_descriptor(space, zip_size, page, mtr); @@ -3389,6 +3429,8 @@ fseg_free_extent( ut_a(xdes_get_state(descr, mtr) == XDES_FSEG); ut_a(0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, mtr), mtr_read_dulint(seg_inode + FSEG_ID, mtr))); + ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) + == FSEG_MAGIC_N_VALUE); first_page_in_extent = page - (page % FSP_EXTENT_SIZE); @@ -3479,7 +3521,13 @@ fseg_free_step( ut_a(descr); ut_a(xdes_get_bit(descr, XDES_FREE_BIT, header_page % FSP_EXTENT_SIZE, mtr) == FALSE); - inode = fseg_inode_get(header, space, zip_size, mtr); + inode = fseg_inode_try_get(header, space, zip_size, mtr); + + if (UNIV_UNLIKELY(inode == NULL)) { + fprintf(stderr, "double free of inode from %u:%u\n", + (unsigned) space, (unsigned) header_page); + return(TRUE); + } descr = fseg_get_first_extent(inode, space, zip_size, mtr); @@ -3603,6 +3651,7 @@ fseg_get_first_extent( ut_ad(inode && mtr); ut_ad(space == page_get_space_id(page_align(inode))); + ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); first = fil_addr_null; @@ -3817,6 +3866,7 @@ fseg_print_low( (ulong) reserved, (ulong) used, (ulong) n_full, (ulong) n_frag, (ulong) n_free, (ulong) n_not_full, (ulong) n_used); + ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); } #ifdef UNIV_BTR_PRINT diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index 18d6adf7a92..8ac804ca9b6 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -3159,7 +3159,7 @@ retry: if (is_part) { sql_print_error("Failed to open table %s after " - "%lu attemtps.\n", norm_name, + "%lu attempts.\n", norm_name, retries); } @@ -5060,6 +5060,11 @@ ha_innobase::index_read( index = prebuilt->index; + if (UNIV_UNLIKELY(index == NULL)) { + prebuilt->index_usable = FALSE; + DBUG_RETURN(HA_ERR_CRASHED); + } + /* Note that if the index for which the search template is built is not necessarily prebuilt->index, but can also be the clustered index */ @@ -5219,6 +5224,7 @@ ha_innobase::change_active_index( if (UNIV_UNLIKELY(!prebuilt->index)) { sql_print_warning("InnoDB: change_active_index(%u) failed", keynr); + prebuilt->index_usable = FALSE; DBUG_RETURN(1); } diff --git a/storage/innodb_plugin/include/log0recv.h b/storage/innodb_plugin/include/log0recv.h index 8468c213bdb..6de735be945 100644 --- a/storage/innodb_plugin/include/log0recv.h +++ b/storage/innodb_plugin/include/log0recv.h @@ -433,6 +433,11 @@ are allowed yet: the variable name is misleading. */ extern ibool recv_no_ibuf_operations; /** TRUE when recv_init_crash_recovery() has been called. */ extern ibool recv_needed_recovery; +#ifdef UNIV_DEBUG +/** TRUE if writing to the redo log (mtr_commit) is forbidden. +Protected by log_sys->mutex. */ +extern ibool recv_no_log_write; +#endif /* UNIV_DEBUG */ /** TRUE if buf_page_is_corrupted() should check if the log sequence number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by diff --git a/storage/innodb_plugin/include/page0page.h b/storage/innodb_plugin/include/page0page.h index a4fe069d022..3899499fb6a 100644 --- a/storage/innodb_plugin/include/page0page.h +++ b/storage/innodb_plugin/include/page0page.h @@ -76,8 +76,11 @@ typedef byte page_header_t; header which are set in a page create */ /*----*/ #define PAGE_LEVEL 26 /* level of the node in an index tree; the - leaf level is the level 0 */ -#define PAGE_INDEX_ID 28 /* index id where the page belongs */ + leaf level is the level 0. This field should + not be written to after page creation. */ +#define PAGE_INDEX_ID 28 /* index id where the page belongs. + This field should not be written to after + page creation. */ #define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in a B-tree: defined only on the root page of a B-tree, but not in the root of an ibuf tree */ diff --git a/storage/innodb_plugin/include/page0zip.h b/storage/innodb_plugin/include/page0zip.h index 9aaa066306b..574809e5227 100644 --- a/storage/innodb_plugin/include/page0zip.h +++ b/storage/innodb_plugin/include/page0zip.h @@ -127,8 +127,12 @@ page_zip_decompress( /*================*/ page_zip_des_t* page_zip,/*!< in: data, ssize; out: m_start, m_end, m_nonempty, n_blobs */ - page_t* page) /*!< out: uncompressed page, may be trashed */ - __attribute__((nonnull)); + page_t* page, /*!< out: uncompressed page, may be trashed */ + ibool all) /*!< in: TRUE=decompress the whole page; + FALSE=verify but do not copy some + page header fields that should not change + after page creation */ + __attribute__((nonnull(1,2))); #ifdef UNIV_DEBUG /**********************************************************************//** @@ -385,8 +389,8 @@ IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a non-clustered index, the caller must update the insert buffer free bits in the same mini-transaction in such a way that the modification will be redo-logged. -@return TRUE on success, FALSE on failure; page and page_zip will be -left intact on failure. */ +@return TRUE on success, FALSE on failure; page_zip will be left +intact on failure, but page will be overwritten. */ UNIV_INTERN ibool page_zip_reorganize( diff --git a/storage/innodb_plugin/include/row0ins.h b/storage/innodb_plugin/include/row0ins.h index 530622e6225..9f93565ddb7 100644 --- a/storage/innodb_plugin/include/row0ins.h +++ b/storage/innodb_plugin/include/row0ins.h @@ -45,7 +45,7 @@ row_ins_check_foreign_constraint( /*=============================*/ ibool check_ref,/*!< in: TRUE If we want to check that the referenced table is ok, FALSE if we - want to to check the foreign key table */ + want to check the foreign key table */ dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the tables mentioned in it must be in the dictionary cache if they exist at all */ diff --git a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/lock0lock.c index 20d444af3f4..67b2eac7219 100644 --- a/storage/innodb_plugin/lock/lock0lock.c +++ b/storage/innodb_plugin/lock/lock0lock.c @@ -4633,6 +4633,10 @@ lock_rec_queue_validate( next function call: we have to release lock table mutex to obey the latching order */ + /* If this thread is holding the file space latch + (fil_space_t::latch), the following check WILL break + latching order and may cause a deadlock of threads. */ + impl_trx = lock_sec_rec_some_has_impl_off_kernel( rec, index, offsets); @@ -4756,6 +4760,11 @@ loop: lock_mutex_exit_kernel(); + /* If this thread is holding the file space + latch (fil_space_t::latch), the following + check WILL break the latching order and may + cause a deadlock of threads. */ + lock_rec_queue_validate(block, rec, index, offsets); lock_mutex_enter_kernel(); diff --git a/storage/innodb_plugin/log/log0log.c b/storage/innodb_plugin/log/log0log.c index 85de72bb768..a23dd20772a 100644 --- a/storage/innodb_plugin/log/log0log.c +++ b/storage/innodb_plugin/log/log0log.c @@ -241,6 +241,7 @@ log_reserve_and_open( ut_a(len < log->buf_size / 2); loop: mutex_enter(&(log->mutex)); + ut_ad(!recv_no_log_write); /* Calculate an upper limit for the space the string may take in the log buffer */ @@ -309,6 +310,7 @@ log_write_low( ut_ad(mutex_own(&(log->mutex))); part_loop: + ut_ad(!recv_no_log_write); /* Calculate a part length */ data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len; @@ -377,6 +379,7 @@ log_close(void) ib_uint64_t checkpoint_age; ut_ad(mutex_own(&(log->mutex))); + ut_ad(!recv_no_log_write); lsn = log->lsn; @@ -668,8 +671,6 @@ log_calc_max_ages(void) ulint archive_margin; ulint smallest_archive_margin; - ut_ad(!mutex_own(&(log_sys->mutex))); - mutex_enter(&(log_sys->mutex)); group = UT_LIST_GET_FIRST(log_sys->log_groups); @@ -1117,6 +1118,7 @@ log_io_complete( } mutex_enter(&(log_sys->mutex)); + ut_ad(!recv_no_log_write); ut_a(group->n_pending_writes > 0); ut_a(log_sys->n_pending_writes > 0); @@ -1148,6 +1150,7 @@ log_group_file_header_flush( ulint dest_offset; ut_ad(mutex_own(&(log_sys->mutex))); + ut_ad(!recv_no_log_write); ut_a(nth_file < group->n_files); buf = *(group->file_header_bufs + nth_file); @@ -1219,6 +1222,7 @@ log_group_write_buf( ulint i; ut_ad(mutex_own(&(log_sys->mutex))); + ut_ad(!recv_no_log_write); ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0); ut_a(((ulint) start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); @@ -1361,6 +1365,7 @@ loop: #endif mutex_enter(&(log_sys->mutex)); + ut_ad(!recv_no_log_write); if (flush_to_disk && log_sys->flushed_to_disk_lsn >= lsn) { @@ -1974,6 +1979,7 @@ log_checkpoint( mutex_enter(&(log_sys->mutex)); + ut_ad(!recv_no_log_write); oldest_lsn = log_buf_pool_get_oldest_modification(); mutex_exit(&(log_sys->mutex)); @@ -2086,6 +2092,7 @@ loop: do_checkpoint = FALSE; mutex_enter(&(log->mutex)); + ut_ad(!recv_no_log_write); if (log->check_flush_or_checkpoint == FALSE) { mutex_exit(&(log->mutex)); @@ -3035,6 +3042,7 @@ loop: #endif /* UNIV_LOG_ARCHIVE */ mutex_enter(&(log_sys->mutex)); + ut_ad(!recv_no_log_write); if (log_sys->check_flush_or_checkpoint) { diff --git a/storage/innodb_plugin/log/log0recv.c b/storage/innodb_plugin/log/log0recv.c index 2e4f14beccb..81dcc9cd4f8 100644 --- a/storage/innodb_plugin/log/log0recv.c +++ b/storage/innodb_plugin/log/log0recv.c @@ -78,6 +78,11 @@ UNIV_INTERN ibool recv_recovery_from_backup_on = FALSE; #ifndef UNIV_HOTBACKUP /** TRUE when recv_init_crash_recovery() has been called. */ UNIV_INTERN ibool recv_needed_recovery = FALSE; +# ifdef UNIV_DEBUG +/** TRUE if writing to the redo log (mtr_commit) is forbidden. +Protected by log_sys->mutex. */ +UNIV_INTERN ibool recv_no_log_write = FALSE; +# endif /* UNIV_DEBUG */ /** TRUE if buf_page_is_corrupted() should check if the log sequence number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by @@ -1705,6 +1710,7 @@ loop: /* Flush all the file pages to disk and invalidate them in the buffer pool */ + ut_d(recv_no_log_write = TRUE); mutex_exit(&(recv_sys->mutex)); mutex_exit(&(log_sys->mutex)); @@ -1718,6 +1724,7 @@ loop: mutex_enter(&(log_sys->mutex)); mutex_enter(&(recv_sys->mutex)); + ut_d(recv_no_log_write = FALSE); recv_no_ibuf_operations = FALSE; } diff --git a/storage/innodb_plugin/mtr/mtr0mtr.c b/storage/innodb_plugin/mtr/mtr0mtr.c index 0c4bec8c82c..417e97732bb 100644 --- a/storage/innodb_plugin/mtr/mtr0mtr.c +++ b/storage/innodb_plugin/mtr/mtr0mtr.c @@ -35,6 +35,7 @@ Created 11/26/1995 Heikki Tuuri #include "log0log.h" #ifndef UNIV_HOTBACKUP +# include "log0recv.h" /*****************************************************************//** Releases the item in the slot given. */ UNIV_INLINE @@ -181,6 +182,8 @@ mtr_commit( ut_d(mtr->state = MTR_COMMITTING); #ifndef UNIV_HOTBACKUP + /* This is a dirty read, for debugging. */ + ut_ad(!recv_no_log_write); write_log = mtr->modifications && mtr->n_log_recs; if (write_log) { diff --git a/storage/innodb_plugin/page/page0cur.c b/storage/innodb_plugin/page/page0cur.c index 65f3ba67439..f10f16a7dd9 100644 --- a/storage/innodb_plugin/page/page0cur.c +++ b/storage/innodb_plugin/page/page0cur.c @@ -1195,7 +1195,7 @@ page_cur_insert_rec_zip_reorg( } /* Out of space: restore the page */ - if (!page_zip_decompress(page_zip, page)) { + if (!page_zip_decompress(page_zip, page, FALSE)) { ut_error; /* Memory corrupted? */ } ut_ad(page_validate(page, index)); diff --git a/storage/innodb_plugin/page/page0page.c b/storage/innodb_plugin/page/page0page.c index b771bf4ded9..ab2ba60570e 100644 --- a/storage/innodb_plugin/page/page0page.c +++ b/storage/innodb_plugin/page/page0page.c @@ -679,7 +679,7 @@ page_copy_rec_list_end( if (UNIV_UNLIKELY (!page_zip_decompress(new_page_zip, - new_page))) { + new_page, FALSE))) { ut_error; } ut_ad(page_validate(new_page, index)); @@ -792,7 +792,7 @@ page_copy_rec_list_start( if (UNIV_UNLIKELY (!page_zip_decompress(new_page_zip, - new_page))) { + new_page, FALSE))) { ut_error; } ut_ad(page_validate(new_page, index)); diff --git a/storage/innodb_plugin/page/page0zip.c b/storage/innodb_plugin/page/page0zip.c index e170adce30a..aa5e39ff04a 100644 --- a/storage/innodb_plugin/page/page0zip.c +++ b/storage/innodb_plugin/page/page0zip.c @@ -2821,7 +2821,11 @@ page_zip_decompress( /*================*/ page_zip_des_t* page_zip,/*!< in: data, ssize; out: m_start, m_end, m_nonempty, n_blobs */ - page_t* page) /*!< out: uncompressed page, may be trashed */ + page_t* page, /*!< out: uncompressed page, may be trashed */ + ibool all) /*!< in: TRUE=decompress the whole page; + FALSE=verify but do not copy some + page header fields that should not change + after page creation */ { z_stream d_stream; dict_index_t* index = NULL; @@ -2851,13 +2855,36 @@ page_zip_decompress( heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE); recs = mem_heap_alloc(heap, n_dense * (2 * sizeof *recs)); + if (all) { + /* Copy the page header. */ + memcpy(page, page_zip->data, PAGE_DATA); + } else { + /* Check that the bytes that we skip are identical. */ +#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG + ut_a(!memcmp(FIL_PAGE_TYPE + page, + FIL_PAGE_TYPE + page_zip->data, + PAGE_HEADER - FIL_PAGE_TYPE)); + ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page, + PAGE_HEADER + PAGE_LEVEL + page_zip->data, + PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL))); +#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ + + /* Copy the mutable parts of the page header. */ + memcpy(page, page_zip->data, FIL_PAGE_TYPE); + memcpy(PAGE_HEADER + page, PAGE_HEADER + page_zip->data, + PAGE_LEVEL - PAGE_N_DIR_SLOTS); + +#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG + /* Check that the page headers match after copying. */ + ut_a(!memcmp(page, page_zip->data, PAGE_DATA)); +#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ + } + #ifdef UNIV_ZIP_DEBUG - /* Clear the page. */ - memset(page, 0x55, UNIV_PAGE_SIZE); + /* Clear the uncompressed page, except the header. */ + memset(PAGE_DATA + page, 0x55, UNIV_PAGE_SIZE - PAGE_DATA); #endif /* UNIV_ZIP_DEBUG */ - UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE); - /* Copy the page header. */ - memcpy(page, page_zip->data, PAGE_DATA); + UNIV_MEM_INVALID(PAGE_DATA + page, UNIV_PAGE_SIZE - PAGE_DATA); /* Copy the page directory. */ if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs, @@ -3098,7 +3125,7 @@ page_zip_validate_low( #endif /* UNIV_DEBUG_VALGRIND */ temp_page_zip = *page_zip; - valid = page_zip_decompress(&temp_page_zip, temp_page); + valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE); if (!valid) { fputs("page_zip_validate(): failed to decompress\n", stderr); goto func_exit; @@ -4376,8 +4403,8 @@ IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a non-clustered index, the caller must update the insert buffer free bits in the same mini-transaction in such a way that the modification will be redo-logged. -@return TRUE on success, FALSE on failure; page and page_zip will be -left intact on failure. */ +@return TRUE on success, FALSE on failure; page_zip will be left +intact on failure, but page will be overwritten. */ UNIV_INTERN ibool page_zip_reorganize( @@ -4442,9 +4469,6 @@ page_zip_reorganize( if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) { - /* Restore the old page and exit. */ - buf_frame_copy(page, temp_page); - #ifndef UNIV_HOTBACKUP buf_block_free(temp_block); #endif /* !UNIV_HOTBACKUP */ @@ -4605,7 +4629,8 @@ corrupt: memcpy(page_zip->data + page_zip_get_size(page_zip) - trailer_size, ptr + 8 + size, trailer_size); - if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page))) { + if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page, + TRUE))) { goto corrupt; } diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c index 930c9ec1fc7..ab3ae70e3e2 100644 --- a/storage/innodb_plugin/row/row0ins.c +++ b/storage/innodb_plugin/row/row0ins.c @@ -1191,7 +1191,7 @@ row_ins_check_foreign_constraint( /*=============================*/ ibool check_ref,/*!< in: TRUE if we want to check that the referenced table is ok, FALSE if we - want to to check the foreign key table */ + want to check the foreign key table */ dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the tables mentioned in it must be in the dictionary cache if they exist at all */ diff --git a/storage/innodb_plugin/row/row0merge.c b/storage/innodb_plugin/row/row0merge.c index a968fe80864..a303a2f3278 100644 --- a/storage/innodb_plugin/row/row0merge.c +++ b/storage/innodb_plugin/row/row0merge.c @@ -1947,7 +1947,15 @@ row_merge_drop_index( static const char str1[] = "PROCEDURE DROP_INDEX_PROC () IS\n" "BEGIN\n" + /* Rename the index, so that it will be dropped by + row_merge_drop_temp_indexes() at crash recovery + if the server crashes before this trx is committed. */ + "UPDATE SYS_INDEXES SET NAME=CONCAT('" + TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n" + "COMMIT WORK;\n" + /* Drop the field definitions of the index. */ "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n" + /* Drop the index definition and the B-tree. */ "DELETE FROM SYS_INDEXES WHERE ID = :indexid\n" " AND TABLE_ID = :tableid;\n" "END;\n"; diff --git a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c index 1e36a2e4fe7..21ba6e481a7 100644 --- a/storage/innodb_plugin/trx/trx0trx.c +++ b/storage/innodb_plugin/trx/trx0trx.c @@ -803,7 +803,7 @@ trx_commit_off_kernel( in exactly the same order as commit lsn's, if the transactions have different rollback segments. To get exactly the same order we should hold the kernel mutex up to this point, - adding to to the contention of the kernel mutex. However, if + adding to the contention of the kernel mutex. However, if a transaction T2 is able to see modifications made by a transaction T1, T2 will always get a bigger transaction number and a bigger commit lsn than T1. */