From fecd255a16fc3be6206e03ad5e70d8e90118acc6 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 29 Apr 2011 14:49:04 +0200 Subject: [PATCH] Updated with changes from Percona Server 5.1.56-12.7, from lp:~percona-dev/percona-server/release-5.1.56-12.7 percona-server-5.1.56-12.7 as of April 29, 2011. Merged: revid:ignacio.nin@percona.com-20110427224434-e5a4kpyfwvj641q3 --- ChangeLog | 98 +++ btr/btr0btr.c | 4 +- btr/btr0cur.c | 748 ++++++------------ btr/btr0sea.c | 187 +++-- buf/buf0buddy.c | 7 +- buf/buf0buf.c | 59 +- buf/buf0flu.c | 9 +- buf/buf0lru.c | 91 ++- dict/dict0boot.c | 6 +- dict/dict0crea.c | 7 +- dict/dict0dict.c | 150 +++- dict/dict0load.c | 1 + fil/fil0fil.c | 104 ++- fsp/fsp0fsp.c | 23 +- handler/ha_innodb.cc | 349 ++++---- handler/i_s.cc | 44 +- ibuf/ibuf0ibuf.c | 8 +- include/btr0cur.h | 89 ++- include/buf0buf.h | 15 +- include/buf0buf.ic | 11 +- include/buf0lru.h | 21 +- include/buf0types.h | 2 +- include/data0data.h | 11 +- include/data0data.ic | 22 +- include/dict0boot.h | 1 + include/dict0dict.h | 7 + include/dict0mem.h | 6 + include/dict0types.h | 5 - include/fil0fil.h | 12 +- include/fsp0types.h | 2 +- include/os0file.h | 7 +- include/page0cur.h | 16 - include/rem0cmp.h | 7 +- include/rem0cmp.ic | 2 +- include/row0mysql.h | 8 + include/row0upd.h | 34 +- include/srv0srv.h | 24 +- include/sync0rw.h | 15 +- include/sync0sync.h | 13 +- include/trx0i_s.h | 2 + include/trx0rseg.h | 4 +- include/trx0trx.h | 4 +- include/univ.i | 23 +- include/ut0vec.h | 19 + include/ut0vec.ic | 29 + lock/lock0lock.c | 78 +- log/log0recv.c | 2 +- mem/mem0mem.c | 2 +- mtr/mtr0log.c | 2 +- mtr/mtr0mtr.c | 1 + os/os0file.c | 15 +- page/page0cur.c | 68 -- page/page0zip.c | 2 +- percona-suite/innodb_fix_misc_bug51325.result | 13 + percona-suite/innodb_fix_misc_bug51325.test | 13 + .../percona_show_slave_status_nolock.result | 12 +- .../percona_show_slave_status_nolock.test | 3 + ...na_slow_extended-slave_innodb_stats.result | 10 +- ...ents-and-use_global_long_query_time.result | 13 +- ...ements-and-use_global_long_query_time.test | 3 + ...cona_slow_extended-slave_statements.result | 17 +- ...ercona_slow_extended-slave_statements.test | 3 + percona-suite/userstat_bug602047.result | 16 + percona-suite/userstat_bug602047.test | 13 + plug.in | 25 +- rem/rem0cmp.c | 17 +- row/row0ins.c | 4 +- row/row0merge.c | 4 +- row/row0mysql.c | 29 +- row/row0purge.c | 68 +- row/row0row.c | 16 + row/row0umod.c | 44 +- row/row0upd.c | 386 ++++++--- row/row0vers.c | 10 +- srv/srv0srv.c | 16 +- sync/sync0arr.c | 4 +- sync/sync0rw.c | 25 +- sync/sync0sync.c | 7 +- trx/trx0i_s.c | 5 +- trx/trx0rec.c | 17 +- trx/trx0roll.c | 4 +- trx/trx0trx.c | 25 +- ut/ut0dbg.c | 2 +- 83 files changed, 1998 insertions(+), 1302 deletions(-) create mode 100644 percona-suite/innodb_fix_misc_bug51325.result create mode 100644 percona-suite/innodb_fix_misc_bug51325.test create mode 100644 percona-suite/userstat_bug602047.result create mode 100644 percona-suite/userstat_bug602047.test diff --git a/ChangeLog b/ChangeLog index 5ca60eb73d5..102db3d7824 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,102 @@ +2011-01-31 The InnoDB Team + + * btr/btr0cur.c, include/row0upd.h, + row/row0purge.c, row/row0umod.c, row/row0upd.c: + Bug#59230 assert 0 row_upd_changes_ord_field_binary() + in post-crash rollback or purge + +2011-01-27 The InnoDB Team + + * btr/btr0cur.c: + Bug#59465 btr_estimate_number_of_different_key_vals use + incorrect offset for external_size + +2011-01-27 The InnoDB Team + + * include/trx0trx.h, trx/trx0trx.c: + Bug#59440 Race condition in XA ROLLBACK and XA COMMIT + after server restart + +2011-01-25 The InnoDB Team + + * row/row0upd.c: + Bug#59585 Fix 58912 introduces compiler warning + due to potentially uninitialized variable + +2011-01-25 The InnoDB Team + + * mtr/mtr0log.c: + Bug#59486 Incorrect usage of UNIV_UNLIKELY() in mlog_parse_string() + +2011-01-25 The InnoDB Team + + * row/row0vers.c: + Fix Bug#59464 Race condition in row_vers_build_for_semi_consistent_read + +2011-01-25 The InnoDB Team + + * btr/btr0btr.c, btr/btr0cur.c, btr/btr0sea.c, + buf/buf0buddy.c, buf/buf0buf.c, buf/buf0lru.c, + include/buf0buf.h, include/buf0buf.ic, include/buf0lru.h, + mem/mem0mem.c, page/page0zip.c: + Fix Bug#59707 Unused compression-related parameters + in buffer pool functions + +2011-01-18 The InnoDB Team + + * include/sync0rw.h, sync/sync0arr.c, sync/sync0rw.c: + Fix Bug#59579 rw_lock_debug_print outputs to stderr, not to + SHOW ENGINE INNODB STATUS + +2011-01-14 The InnoDB Team + * btr/btr0cur.c, dict/dict0dict.c, handler/ha_innodb.cc, + include/btr0cur.h, include/dict0mem.h, include/rem0cmp.h, + include/rem0cmp.ic, include/srv0srv.h, rem/rem0cmp.c, + srv/srv0srv.c, innodb_bug30423.test: + Fix Bug#30423 InnoDBs treatment of NULL in index stats causes + bad "rows examined" estimates + +2011-01-06 The InnoDB Team + * row/row0merge.c: + Fix Bug#59312 Examine MAX_FULL_NAME_LEN in InnoDB to address + possible insufficient name buffer + +2011-01-06 The InnoDB Team + * dict/dict0dict.c, handler/ha_innodb.cc, handler/i_s.cc, + include/univ.i: + Fix Bug#58643 InnoDB: too long table name + +2011-01-06 The InnoDB Team + * handler/i_s.cc, include/trx0i_s.h, trx/trx0i_s.c: + Fix Bug#55397 cannot select from innodb_trx when trx_query contains + blobs that aren't strings + +2011-01-04 The InnoDB Team + * dict/dict0dict.c: + Fix Bug#59197 double quote in field comment prevents foreign + key constraint creation + +2010-12-21 The InnoDB Team + * include/btr0cur.h, include/row0upd.h, btr/btr0cur.c, + row/row0umod.c, row/row0upd.c: + Fix Bug#55284 Double free of off-page columns due to lock wait + while updating PRIMARY KEY + +2010-12-21 The InnoDB Team + + * include/data0data.h, include/data0data.ic, include/row0upd.h, + btr/btr0cur.c, row/row0purge.c, row/row0umod.c, row/row0upd.c, + innodb.result, innodb.test: + Fix Bug#58912 InnoDB unnecessarily avoids update-in-place + on column prefix indexes + +2010-12-09 The InnoDB Team + + * buf/buf0lru.c: + Fix Bug#57600 output of I/O sum[%lu] can go negative + 2010-11-11 The InnoDB Team + * thr/thr0loc.c, trx/trx0i_s.c: Fix Bug#57802 Empty ASSERTION parameter passed to the HASH_SEARCH macro diff --git a/btr/btr0btr.c b/btr/btr0btr.c index 4d6cd8d80fc..2fb14b06a7b 100644 --- a/btr/btr0btr.c +++ b/btr/btr0btr.c @@ -675,7 +675,7 @@ btr_page_get_father_node_ptr_func( " to fix the\n" "InnoDB: corruption. If the crash happens at " "the database startup, see\n" - "InnoDB: " REFMAN "forcing-recovery.html about\n" + "InnoDB: " REFMAN "forcing-innodb-recovery.html about\n" "InnoDB: forcing recovery. " "Then dump + drop + reimport.\n", stderr); @@ -1009,7 +1009,7 @@ btr_page_reorganize_low( log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); #ifndef UNIV_HOTBACKUP - temp_block = buf_block_alloc(0); + temp_block = buf_block_alloc(); #else /* !UNIV_HOTBACKUP */ ut_ad(block == back_block1); temp_block = back_block2; diff --git a/btr/btr0cur.c b/btr/btr0cur.c index 25bdd176880..9b306ea2864 100644 --- a/btr/btr0cur.c +++ b/btr/btr0cur.c @@ -100,6 +100,18 @@ can be released by page reorganize, then it is reorganized */ /*--------------------------------------*/ #define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB part header, in bytes */ + +/** Estimated table level stats from sampled value. +@param value sampled stats +@param index index being sampled +@param sample number of sampled rows +@param ext_size external stored data size +@param not_empty table not empty +@return estimated table wide stats from sampled value */ +#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\ + (((value) * (ib_int64_t) index->stat_n_leaf_pages \ + + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size))) + /* @} */ #endif /* !UNIV_HOTBACKUP */ @@ -174,7 +186,7 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - rec_t* rec, /*!< in: record */ + const rec_t* rec, /*!< in: record */ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ #endif /* !UNIV_HOTBACKUP */ @@ -961,108 +973,6 @@ btr_cur_open_at_rnd_pos_func( } } -/**********************************************************************//** -Positions a cursor at a randomly chosen position within a B-tree -after the given path -@return TRUE if the position is at the first page, and cursor must point - the first record for used by the caller.*/ -UNIV_INTERN -ibool -btr_cur_open_at_rnd_pos_after_path( -/*====================*/ - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_path_t* first_rec_path, - btr_cur_t* cursor, /*!< in/out: B-tree cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_cur_t* page_cursor; - btr_path_t* slot; - ibool is_first_rec = TRUE; - ulint page_no; - ulint space; - ulint zip_size; - ulint height; - rec_t* node_ptr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - if (latch_mode == BTR_MODIFY_TREE) { - mtr_x_lock(dict_index_get_lock(index), mtr); - } else { - mtr_s_lock(dict_index_get_lock(index), mtr); - } - - page_cursor = btr_cur_get_page_cur(cursor); - cursor->index = index; - - space = dict_index_get_space(index); - zip_size = dict_table_zip_size(index->table); - page_no = dict_index_get_page(index); - - height = ULINT_UNDEFINED; - slot = first_rec_path; - - for (;;) { - buf_block_t* block; - page_t* page; - - block = buf_page_get_gen(space, zip_size, page_no, - RW_NO_LATCH, NULL, BUF_GET, - __FILE__, __LINE__, mtr); - page = buf_block_get_frame(block); - ut_ad(0 == ut_dulint_cmp(index->id, - btr_page_get_index_id(page))); - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page, mtr); - } - - if (height == 0) { - btr_cur_latch_leaves(page, space, zip_size, page_no, - latch_mode, cursor, mtr); - } - - if (is_first_rec && slot->nth_rec != ULINT_UNDEFINED) { - if (height == 0) { - /* must open the first rec */ - page_cur_open_on_nth_user_rec(block, page_cursor, slot->nth_rec); - } else { - is_first_rec = page_cur_open_on_rnd_user_rec_after_nth(block, - page_cursor, slot->nth_rec); - } - } else { - is_first_rec = FALSE; - page_cur_open_on_rnd_user_rec(block, page_cursor); - } - - if (height == 0) { - break; - } - - ut_ad(height > 0); - - height--; - slot++; - - node_ptr = page_cur_get_rec(page_cursor); - offsets = rec_get_offsets(node_ptr, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return (is_first_rec); -} - /*==================== B-TREE INSERT =========================*/ /*************************************************************//** @@ -1933,7 +1843,8 @@ btr_cur_update_in_place( NOT call it if index is secondary */ if (!dict_index_is_clust(index) - || row_upd_changes_ord_field_binary(NULL, index, update)) { + || row_upd_changes_ord_field_binary(index, update, thr, + NULL, NULL)) { /* Remove possible hash index pointer to this record */ btr_search_update_hash_on_delete(cursor); @@ -2685,27 +2596,24 @@ ulint btr_cur_del_mark_set_clust_rec( /*===========================*/ ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor */ + buf_block_t* block, /*!< in/out: buffer block of the record */ + rec_t* rec, /*!< in/out: record */ + dict_index_t* index, /*!< in: clustered index of the record */ + const ulint* offsets,/*!< in: rec_get_offsets(rec) */ ibool val, /*!< in: value to set */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr) /*!< in: mtr */ { - dict_index_t* index; - buf_block_t* block; roll_ptr_t roll_ptr; ulint err; - rec_t* rec; page_zip_des_t* page_zip; trx_t* trx; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - rec = btr_cur_get_rec(cursor); - index = cursor->index; + ut_ad(dict_index_is_clust(index)); + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); + ut_ad(buf_block_get_frame(block) == page_align(rec)); + ut_ad(page_is_leaf(page_align(rec))); #ifdef UNIV_DEBUG if (btr_cur_print_record_ops && thr) { @@ -2717,13 +2625,12 @@ btr_cur_del_mark_set_clust_rec( ut_ad(dict_index_is_clust(index)); ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - err = lock_clust_rec_modify_check_and_lock(flags, - btr_cur_get_block(cursor), + err = lock_clust_rec_modify_check_and_lock(flags, block, rec, index, offsets, thr); if (err != DB_SUCCESS) { - goto func_exit; + return(err); } err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr, @@ -2731,11 +2638,9 @@ btr_cur_del_mark_set_clust_rec( &roll_ptr); if (err != DB_SUCCESS) { - goto func_exit; + return(err); } - block = btr_cur_get_block(cursor); - if (block->is_hashed) { rw_lock_x_lock(&btr_search_latch); } @@ -2758,10 +2663,6 @@ btr_cur_del_mark_set_clust_rec( btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx, roll_ptr, mtr); -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } return(err); } @@ -3392,149 +3293,43 @@ btr_estimate_n_rows_in_range( } /*******************************************************************//** -Estimates the number of pages which have not null value of the key of n_cols. -@return estimated number of pages */ -UNIV_INTERN -ulint -btr_estimate_n_pages_not_null( -/*=========================*/ - dict_index_t* index, /*!< in: index */ - ulint n_cols, /*!< in: The cols should be not null */ - btr_path_t* path1) /*!< in: path1[BTR_PATH_ARRAY_N_SLOTS] */ +Record the number of non_null key values in a given index for +each n-column prefix of the index where n < dict_index_get_n_unique(index). +The estimates are eventually stored in the array: +index->stat_n_non_null_key_vals. */ +static +void +btr_record_not_null_field_in_rec( +/*=============================*/ + rec_t* rec, /*!< in: physical record */ + ulint n_unique, /*!< in: dict_index_get_n_unique(index), + number of columns uniquely determine + an index entry */ + const ulint* offsets, /*!< in: rec_get_offsets(rec, index), + its size could be for all fields or + that of "n_unique" */ + ib_int64_t* n_not_null) /*!< in/out: array to record number of + not null rows for n-column prefix */ { - dtuple_t* tuple1; - btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS]; - btr_cur_t cursor; - btr_path_t* slot1; - btr_path_t* slot2; - ibool diverged; - ibool diverged_lot; - ulint divergence_level; - ulint n_pages; - ulint i; - mtr_t mtr; - mem_heap_t* heap; + ulint i; - heap = mem_heap_create(n_cols * sizeof(dfield_t) - + sizeof(dtuple_t)); + ut_ad(rec_offs_n_fields(offsets) >= n_unique); - /* make tuple1 (NULL,NULL,,,) from n_cols */ - tuple1 = dtuple_create(heap, n_cols); - dict_index_copy_types(tuple1, index, n_cols); - - for (i = 0; i < n_cols; i++) { - dfield_set_null(dtuple_get_nth_field(tuple1, i)); + if (n_not_null == NULL) { + return; } - mtr_start(&mtr); + for (i = 0; i < n_unique; i++) { + ulint rec_len; + byte* field; - cursor.path_arr = path1; + field = rec_get_nth_field(rec, offsets, i, &rec_len); - btr_cur_search_to_nth_level(index, 0, tuple1, PAGE_CUR_G, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, __FILE__, __LINE__, &mtr); - - mtr_commit(&mtr); - - - - mtr_start(&mtr); - - cursor.path_arr = path2; - - btr_cur_open_at_index_side(FALSE, index, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, &mtr); - - mtr_commit(&mtr); - - mem_heap_free(heap); - - /* We have the path information for the range in path1 and path2 */ - - n_pages = 1; - diverged = FALSE; /* This becomes true when the path is not - the same any more */ - diverged_lot = FALSE; /* This becomes true when the paths are - not the same or adjacent any more */ - divergence_level = 1000000; /* This is the level where paths diverged - a lot */ - for (i = 0; ; i++) { - ut_ad(i < BTR_PATH_ARRAY_N_SLOTS); - - slot1 = path1 + i; - slot2 = path2 + i; - - if ((slot1 + 1)->nth_rec == ULINT_UNDEFINED - || (slot2 + 1)->nth_rec == ULINT_UNDEFINED) { - - if (i > divergence_level + 1) { - /* In trees whose height is > 1 our algorithm - tends to underestimate: multiply the estimate - by 2: */ - - n_pages = n_pages * 2; - } - - /* Do not estimate the number of rows in the range - to over 1 / 2 of the estimated rows in the whole - table */ - - if (n_pages > index->stat_n_leaf_pages / 2) { - n_pages = index->stat_n_leaf_pages / 2; - - /* If there are just 0 or 1 rows in the table, - then we estimate all rows are in the range */ - - if (n_pages == 0) { - n_pages = index->stat_n_leaf_pages; - } - } - - return(n_pages); - } - - if (!diverged && slot1->nth_rec != slot2->nth_rec) { - - diverged = TRUE; - - if (slot1->nth_rec < slot2->nth_rec) { - n_pages = slot2->nth_rec - slot1->nth_rec; - - if (n_pages > 1) { - diverged_lot = TRUE; - divergence_level = i; - } - } else { - /* Maybe the tree has changed between - searches */ - - return(10); - } - - } else if (diverged && !diverged_lot) { - - if (slot1->nth_rec < slot1->n_recs - || slot2->nth_rec > 1) { - - diverged_lot = TRUE; - divergence_level = i; - - n_pages = 0; - - if (slot1->nth_rec < slot1->n_recs) { - n_pages += slot1->n_recs - - slot1->nth_rec; - } - - if (slot2->nth_rec > 1) { - n_pages += slot2->nth_rec - 1; - } - } - } else if (diverged_lot) { - - n_pages = (n_pages * (slot1->n_recs + slot2->n_recs)) - / 2; + if (rec_len != UNIV_SQL_NULL) { + n_not_null[i]++; + } else { + /* Break if we hit the first NULL value */ + break; } } } @@ -3542,7 +3337,10 @@ btr_estimate_n_pages_not_null( /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is "nulls_ignored", we also record the number of +non-null values for each prefix and store the estimates in +array index->stat_n_non_null_key_vals. */ UNIV_INTERN void btr_estimate_number_of_different_key_vals( @@ -3556,6 +3354,8 @@ btr_estimate_number_of_different_key_vals( ulint matched_fields; ulint matched_bytes; ib_int64_t* n_diff; + ib_int64_t* n_not_null; + ibool stats_null_not_equal; ullint n_sample_pages; /* number of pages to sample */ ulint not_empty_flag = 0; ulint total_external_size = 0; @@ -3564,42 +3364,49 @@ btr_estimate_number_of_different_key_vals( ullint add_on; mtr_t mtr; mem_heap_t* heap = NULL; - ulint offsets_rec_[REC_OFFS_NORMAL_SIZE]; - ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets_rec = offsets_rec_; - ulint* offsets_next_rec= offsets_next_rec_; - ulint stats_method = srv_stats_method; - btr_path_t first_rec_path[BTR_PATH_ARRAY_N_SLOTS]; - ulint effective_pages; /* effective leaf pages */ - rec_offs_init(offsets_rec_); - rec_offs_init(offsets_next_rec_); + ulint* offsets_rec = NULL; + ulint* offsets_next_rec = NULL; n_cols = dict_index_get_n_unique(index); - if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) { - /* estimate effective pages and path for the first effective record */ - /* TODO: make it work also for n_cols > 1. */ - effective_pages = btr_estimate_n_pages_not_null(index, 1 /*k*/, first_rec_path); + heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) + * (n_cols + 1) + + dict_index_get_n_fields(index) + * (sizeof *offsets_rec + + sizeof *offsets_next_rec)); - if (!effective_pages) { - for (j = 0; j <= n_cols; j++) { - index->stat_n_diff_key_vals[j] = (ib_int64_t)index->stat_n_leaf_pages; - } - return; - } else if (effective_pages > index->stat_n_leaf_pages) { - effective_pages = index->stat_n_leaf_pages; - } - } else { - effective_pages = index->stat_n_leaf_pages; - } + n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); - n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t)); + n_not_null = NULL; + + /* Check srv_innodb_stats_method setting, and decide whether we + need to record non-null value and also decide if NULL is + considered equal (by setting stats_null_not_equal value) */ + switch (srv_innodb_stats_method) { + case SRV_STATS_NULLS_IGNORED: + n_not_null = mem_heap_zalloc(heap, (n_cols + 1) + * sizeof *n_not_null); + /* fall through */ + + case SRV_STATS_NULLS_UNEQUAL: + /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL + case, we will treat NULLs as unequal value */ + stats_null_not_equal = TRUE; + break; + + case SRV_STATS_NULLS_EQUAL: + stats_null_not_equal = FALSE; + break; + + default: + ut_error; + } /* It makes no sense to test more pages than are contained in the index, thus we lower the number if it is too high */ - if (srv_stats_sample_pages > effective_pages) { - if (effective_pages > 0) { - n_sample_pages = effective_pages; + if (srv_stats_sample_pages > index->stat_index_size) { + if (index->stat_index_size > 0) { + n_sample_pages = index->stat_index_size; } else { n_sample_pages = 1; } @@ -3610,16 +3417,9 @@ btr_estimate_number_of_different_key_vals( /* We sample some pages in the index to get an estimate */ for (i = 0; i < n_sample_pages; i++) { - rec_t* supremum; - ibool is_first_page = TRUE; mtr_start(&mtr); - if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) { - is_first_page = btr_cur_open_at_rnd_pos_after_path(index, BTR_SEARCH_LEAF, - first_rec_path, &cursor, &mtr); - } else { btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); - } /* Count the number of different key values for each prefix of the key on this index page. If the prefix does not determine @@ -3634,25 +3434,25 @@ btr_estimate_number_of_different_key_vals( } ut_a(page); - supremum = page_get_supremum_rec(page); - if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS && is_first_page) { - /* the cursor should be the first record of the page. */ - /* Counting should be started from here. */ - rec = btr_cur_get_rec(&cursor); - } else { rec = page_rec_get_next(page_get_infimum_rec(page)); - } - if (rec != supremum) { + if (!page_rec_is_supremum(rec)) { not_empty_flag = 1; offsets_rec = rec_get_offsets(rec, index, offsets_rec, ULINT_UNDEFINED, &heap); + + if (n_not_null) { + btr_record_not_null_field_in_rec( + rec, n_cols, offsets_rec, n_not_null); + } } - while (rec != supremum) { - rec_t* next_rec; - next_rec = page_rec_get_next(rec); - if (next_rec == supremum) { + while (!page_rec_is_supremum(rec)) { + rec_t* next_rec = page_rec_get_next(rec); + if (page_rec_is_supremum(next_rec)) { + total_external_size += + btr_rec_get_externally_stored_len( + rec, offsets_rec); break; } @@ -3660,15 +3460,14 @@ btr_estimate_number_of_different_key_vals( matched_bytes = 0; offsets_next_rec = rec_get_offsets(next_rec, index, offsets_next_rec, - n_cols, &heap); + ULINT_UNDEFINED, + &heap); cmp_rec_rec_with_match(rec, next_rec, offsets_rec, offsets_next_rec, - index, &matched_fields, - &matched_bytes, - (stats_method==SRV_STATS_METHOD_NULLS_NOT_EQUAL) ? - SRV_STATS_METHOD_NULLS_NOT_EQUAL : - SRV_STATS_METHOD_NULLS_EQUAL); + index, stats_null_not_equal, + &matched_fields, + &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { /* We add one if this index record has @@ -3677,6 +3476,12 @@ btr_estimate_number_of_different_key_vals( n_diff[j]++; } + if (n_not_null) { + btr_record_not_null_field_in_rec( + next_rec, n_cols, offsets_next_rec, + n_not_null); + } + total_external_size += btr_rec_get_externally_stored_len( rec, offsets_rec); @@ -3711,10 +3516,6 @@ btr_estimate_number_of_different_key_vals( } } - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - ULINT_UNDEFINED, &heap); - total_external_size += btr_rec_get_externally_stored_len( - rec, offsets_rec); mtr_commit(&mtr); } @@ -3728,13 +3529,9 @@ btr_estimate_number_of_different_key_vals( for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] - = ((n_diff[j] - * (ib_int64_t)effective_pages - + n_sample_pages - 1 - + total_external_size - + not_empty_flag) - / (n_sample_pages - + total_external_size)); + = BTR_TABLE_STATS_FROM_SAMPLE( + n_diff[j], index, n_sample_pages, + total_external_size, not_empty_flag); /* If the tree is small, smaller than 10 * n_sample_pages + total_external_size, then @@ -3744,7 +3541,7 @@ btr_estimate_number_of_different_key_vals( different key values, or even more. Let us try to approximate that: */ - add_on = effective_pages + add_on = index->stat_n_leaf_pages / (10 * (n_sample_pages + total_external_size)); @@ -3754,24 +3551,52 @@ btr_estimate_number_of_different_key_vals( index->stat_n_diff_key_vals[j] += add_on; - if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) { - /* index->stat_n_diff_key_vals[k] is used for calc rec_per_key, - as "stats.records / index->stat_n_diff_key_vals[x]". - So it should be adjusted to the value which is based on whole of the index. */ - index->stat_n_diff_key_vals[j] = - index->stat_n_diff_key_vals[j] * (ib_int64_t)index->stat_n_leaf_pages - / (ib_int64_t)effective_pages; + /* Update the stat_n_non_null_key_vals[] with our + sampled result. stat_n_non_null_key_vals[] is created + and initialized to zero in dict_index_add_to_cache(), + along with stat_n_diff_key_vals[] array */ + if (n_not_null != NULL && (j < n_cols)) { + index->stat_n_non_null_key_vals[j] = + BTR_TABLE_STATS_FROM_SAMPLE( + n_not_null[j], index, n_sample_pages, + total_external_size, not_empty_flag); } } - mem_free(n_diff); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } + mem_heap_free(heap); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ +/***********************************************************//** +Gets the offset of the pointer to the externally stored part of a field. +@return offset of the pointer to the externally stored part */ +static +ulint +btr_rec_get_field_ref_offs( +/*=======================*/ + const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ + ulint n) /*!< in: index of the external field */ +{ + ulint field_ref_offs; + ulint local_len; + + ut_a(rec_offs_nth_extern(offsets, n)); + field_ref_offs = rec_get_nth_field_offs(offsets, n, &local_len); + ut_a(local_len != UNIV_SQL_NULL); + ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); + + return(field_ref_offs + local_len - BTR_EXTERN_FIELD_REF_SIZE); +} + +/** Gets a pointer to the externally stored part of a field. +@param rec record +@param offsets rec_get_offsets(rec) +@param n index of the externally stored field +@return pointer to the externally stored part */ +#define btr_rec_get_field_ref(rec, offsets, n) \ + ((rec) + btr_rec_get_field_ref_offs(offsets, n)) + /***********************************************************//** Gets the externally stored size of a record, in units of a database page. @return externally stored part, in units of a database page */ @@ -3779,28 +3604,27 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - rec_t* rec, /*!< in: record */ + const rec_t* rec, /*!< in: record */ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ { ulint n_fields; - byte* data; - ulint local_len; - ulint extern_len; ulint total_extern_len = 0; ulint i; ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + + if (!rec_offs_any_extern(offsets)) { + return(0); + } + n_fields = rec_offs_n_fields(offsets); for (i = 0; i < n_fields; i++) { if (rec_offs_nth_extern(offsets, i)) { - data = rec_get_nth_field(rec, offsets, i, &local_len); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - extern_len = mach_read_from_4(data + local_len - + BTR_EXTERN_LEN + 4); + ulint extern_len = mach_read_from_4( + btr_rec_get_field_ref(rec, offsets, i) + + BTR_EXTERN_LEN + 4); total_extern_len += ut_calc_align(extern_len, UNIV_PAGE_SIZE); @@ -3830,7 +3654,7 @@ btr_cur_set_ownership_of_extern_field( ulint byte_val; data = rec_get_nth_field(rec, offsets, i, &local_len); - + ut_ad(rec_offs_nth_extern(offsets, i)); ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); local_len -= BTR_EXTERN_FIELD_REF_SIZE; @@ -3840,6 +3664,9 @@ btr_cur_set_ownership_of_extern_field( if (val) { byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG); } else { +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!(byte_val & BTR_EXTERN_OWNER_FLAG)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ byte_val = byte_val | BTR_EXTERN_OWNER_FLAG; } @@ -3856,108 +3683,36 @@ btr_cur_set_ownership_of_extern_field( } /*******************************************************************//** -Marks not updated extern fields as not-owned by this record. The ownership -is transferred to the updated record which is inserted elsewhere in the +Marks non-updated off-page fields as disowned by this record. The ownership +must be transferred to the updated record which is inserted elsewhere in the index tree. In purge only the owner of externally stored field is allowed -to free the field. -@return TRUE if BLOB ownership was transferred */ +to free the field. */ UNIV_INTERN -ibool -btr_cur_mark_extern_inherited_fields( -/*=================================*/ +void +btr_cur_disown_inherited_fields( +/*============================*/ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed part will be updated, or NULL */ rec_t* rec, /*!< in/out: record in a clustered index */ dict_index_t* index, /*!< in: index of the page */ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ const upd_t* update, /*!< in: update vector */ - mtr_t* mtr) /*!< in: mtr, or NULL if not logged */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { - ulint n; - ulint j; ulint i; - ibool change_ownership = FALSE; - ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + ut_ad(rec_offs_any_extern(offsets)); + ut_ad(mtr); - if (!rec_offs_any_extern(offsets)) { - - return(FALSE); - } - - n = rec_offs_n_fields(offsets); - - for (i = 0; i < n; i++) { - if (rec_offs_nth_extern(offsets, i)) { - - /* Check it is not in updated fields */ - - if (update) { - for (j = 0; j < upd_get_n_fields(update); - j++) { - if (upd_get_nth_field(update, j) - ->field_no == i) { - - goto updated; - } - } - } - + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (rec_offs_nth_extern(offsets, i) + && !upd_get_field_by_field_no(update, i)) { btr_cur_set_ownership_of_extern_field( page_zip, rec, index, offsets, i, FALSE, mtr); - - change_ownership = TRUE; -updated: - ; } } - - return(change_ownership); -} - -/*******************************************************************//** -The complement of the previous function: in an update entry may inherit -some externally stored fields from a record. We must mark them as inherited -in entry, so that they are not freed in a rollback. */ -UNIV_INTERN -void -btr_cur_mark_dtuple_inherited_extern( -/*=================================*/ - dtuple_t* entry, /*!< in/out: updated entry to be - inserted to clustered index */ - const upd_t* update) /*!< in: update vector */ -{ - ulint i; - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - - dfield_t* dfield = dtuple_get_nth_field(entry, i); - byte* data; - ulint len; - ulint j; - - if (!dfield_is_ext(dfield)) { - continue; - } - - /* Check if it is in updated fields */ - - for (j = 0; j < upd_get_n_fields(update); j++) { - if (upd_get_nth_field(update, j)->field_no == i) { - - goto is_updated; - } - } - - data = dfield_get_data(dfield); - len = dfield_get_len(dfield); - data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN] - |= BTR_EXTERN_INHERITED_FLAG; - -is_updated: - ; - } } /*******************************************************************//** @@ -3995,29 +3750,6 @@ btr_cur_unmark_extern_fields( } } -/*******************************************************************//** -Marks all extern fields in a dtuple as owned by the record. */ -UNIV_INTERN -void -btr_cur_unmark_dtuple_extern_fields( -/*================================*/ - dtuple_t* entry) /*!< in/out: clustered index entry */ -{ - ulint i; - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - dfield_t* dfield = dtuple_get_nth_field(entry, i); - - if (dfield_is_ext(dfield)) { - byte* data = dfield_get_data(dfield); - ulint len = dfield_get_len(dfield); - - data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN] - &= ~BTR_EXTERN_OWNER_FLAG; - } - } -} - /*******************************************************************//** Flags the data tuple fields that are marked as extern storage in the update vector. We use this function to remember which fields we must @@ -4152,8 +3884,7 @@ btr_blob_free( && buf_block_get_space(block) == space && buf_block_get_page_no(block) == page_no) { - if (buf_LRU_free_block(&block->page, all, NULL, TRUE) - != BUF_LRU_FREED + if (buf_LRU_free_block(&block->page, all, TRUE) != BUF_LRU_FREED && all && block->page.zip.data /* Now, buf_LRU_free_block() may release mutex temporarily */ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE @@ -4162,7 +3893,7 @@ btr_blob_free( /* Attempt to deallocate the uncompressed page if the whole block cannot be deallocted. */ - buf_LRU_free_block(&block->page, FALSE, NULL, TRUE); + buf_LRU_free_block(&block->page, FALSE, TRUE); } } @@ -4179,8 +3910,8 @@ file segment of the index tree. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint -btr_store_big_rec_extern_fields( -/*============================*/ +btr_store_big_rec_extern_fields_func( +/*=================================*/ dict_index_t* index, /*!< in: index of rec; the index tree MUST be X-latched */ buf_block_t* rec_block, /*!< in/out: block containing rec */ @@ -4189,11 +3920,17 @@ btr_store_big_rec_extern_fields( the "external storage" flags in offsets will not correspond to rec when this function returns */ - big_rec_t* big_rec_vec, /*!< in: vector containing fields +#ifdef UNIV_DEBUG + mtr_t* local_mtr, /*!< in: mtr containing the + latch to rec and to the tree */ +#endif /* UNIV_DEBUG */ +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ibool update_in_place,/*! in: TRUE if the record is updated + in place (not delete+insert) */ +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + const big_rec_t*big_rec_vec) /*!< in: vector containing fields to be stored externally */ - mtr_t* local_mtr __attribute__((unused))) /*!< in: mtr - containing the latch to rec and to the - tree */ + { ulint rec_page_no; byte* field_ref; @@ -4211,6 +3948,7 @@ btr_store_big_rec_extern_fields( z_stream c_stream; ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(rec_offs_any_extern(offsets)); ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX)); @@ -4242,21 +3980,37 @@ btr_store_big_rec_extern_fields( ut_a(err == Z_OK); } +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + /* All pointers to externally stored columns in the record + must either be zero or they must be pointers to inherited + columns, owned by this record or an earlier record version. */ + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (!rec_offs_nth_extern(offsets, i)) { + continue; + } + field_ref = btr_rec_get_field_ref(rec, offsets, i); + + ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); + /* Either this must be an update in place, + or the BLOB must be inherited, or the BLOB pointer + must be zero (will be written in this function). */ + ut_a(update_in_place + || (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG) + || !memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + } +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ /* We have to create a file segment to the tablespace for each field and put the pointer to the field in rec */ for (i = 0; i < big_rec_vec->n_fields; i++) { - ut_ad(rec_offs_nth_extern(offsets, - big_rec_vec->fields[i].field_no)); - { - ulint local_len; - field_ref = rec_get_nth_field( - rec, offsets, big_rec_vec->fields[i].field_no, - &local_len); - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - field_ref += local_len; - } + field_ref = btr_rec_get_field_ref( + rec, offsets, big_rec_vec->fields[i].field_no); +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + /* A zero BLOB pointer should have been initially inserted. */ + ut_a(!memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ extern_len = big_rec_vec->fields[i].len; UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data, extern_len); @@ -4538,6 +4292,23 @@ next_zip_page: mem_heap_free(heap); } +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + /* All pointers to externally stored columns in the record + must be valid. */ + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (!rec_offs_nth_extern(offsets, i)) { + continue; + } + + field_ref = btr_rec_get_field_ref(rec, offsets, i); + + /* The pointer must not be zero. */ + ut_a(0 != memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + /* The column must not be disowned by this record. */ + ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); + } +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ return(DB_SUCCESS); } @@ -4560,6 +4331,7 @@ btr_check_blob_fil_page_type( if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) { ulint flags = fil_space_get_flags(space_id); +#ifndef UNIV_DEBUG /* Improve debug test coverage */ if (UNIV_LIKELY ((flags & DICT_TF_FORMAT_MASK) == DICT_TF_FORMAT_51)) { /* Old versions of InnoDB did not initialize @@ -4568,6 +4340,7 @@ btr_check_blob_fil_page_type( a BLOB page that is in Antelope format.*/ return; } +#endif /* !UNIV_DEBUG */ ut_print_timestamp(stderr); fprintf(stderr, @@ -4617,23 +4390,13 @@ btr_free_externally_stored_field( ulint page_no; ulint next_page_no; mtr_t mtr; -#ifdef UNIV_DEBUG + ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains_page(local_mtr, field_ref, MTR_MEMO_PAGE_X_FIX)); ut_ad(!rec || rec_offs_validate(rec, index, offsets)); - - if (rec) { - ulint local_len; - const byte* f = rec_get_nth_field(rec, offsets, - i, &local_len); - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - f += local_len; - ut_ad(f == field_ref); - } -#endif /* UNIV_DEBUG */ + ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i)); if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) { @@ -4798,13 +4561,8 @@ btr_rec_free_externally_stored_fields( for (i = 0; i < n_fields; i++) { if (rec_offs_nth_extern(offsets, i)) { - ulint len; - byte* data - = rec_get_nth_field(rec, offsets, i, &len); - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - btr_free_externally_stored_field( - index, data + len - BTR_EXTERN_FIELD_REF_SIZE, + index, btr_rec_get_field_ref(rec, offsets, i), rec, offsets, page_zip, i, rb_ctx, mtr); } } diff --git a/btr/btr0sea.c b/btr/btr0sea.c index c78f791480c..3b38e2799c2 100644 --- a/btr/btr0sea.c +++ b/btr/btr0sea.c @@ -141,7 +141,7 @@ btr_search_check_free_space_in_heap(void) be enough free space in the hash table. */ if (heap->free_block == NULL) { - buf_block_t* block = buf_block_alloc(0); + buf_block_t* block = buf_block_alloc(); rw_lock_x_lock(&btr_search_latch); @@ -1186,7 +1186,7 @@ btr_search_drop_page_hash_index_on_index( /*=====================================*/ dict_index_t* index) /* in: record descriptor */ { - buf_page_t* bpage; + hash_table_t* table; buf_block_t* block; ulint n_fields; @@ -1202,96 +1202,143 @@ btr_search_drop_page_hash_index_on_index( ulint i; mem_heap_t* heap = NULL; ulint* offsets; + ibool released_search_latch; - rw_lock_x_lock(&btr_search_latch); - mutex_enter(&LRU_list_mutex); + rw_lock_s_lock(&btr_search_latch); table = btr_search_sys->hash_index; - bpage = UT_LIST_GET_LAST(buf_pool->LRU); + do { + buf_chunk_t* chunks = buf_pool->chunks; + buf_chunk_t* chunk = chunks + buf_pool->n_chunks; - while (bpage != NULL) { - block = (buf_block_t*) bpage; - if (block->index == index && block->is_hashed) { - page = block->frame; + released_search_latch = FALSE; - /* from btr_search_drop_page_hash_index() */ - n_fields = block->curr_n_fields; - n_bytes = block->curr_n_bytes; + while (--chunk >= chunks) { + block = chunk->blocks; + i = chunk->size; - ut_a(n_fields + n_bytes > 0); - - n_recs = page_get_n_recs(page); - - /* Calculate and cache fold values into an array for fast deletion - from the hash index */ - - folds = mem_alloc(n_recs * sizeof(ulint)); - - n_cached = 0; - - rec = page_get_infimum_rec(page); - rec = page_rec_get_next_low(rec, page_is_comp(page)); - - index_id = btr_page_get_index_id(page); - - ut_a(0 == ut_dulint_cmp(index_id, index->id)); - - prev_fold = 0; - - offsets = NULL; - - while (!page_rec_is_supremum(rec)) { - offsets = rec_get_offsets(rec, index, offsets, - n_fields + (n_bytes > 0), &heap); - ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0)); - fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id); - - if (fold == prev_fold && prev_fold != 0) { - - goto next_rec; +retry: + for (; i--; block++) { + if (buf_block_get_state(block) + != BUF_BLOCK_FILE_PAGE + || block->index != index + || !block->is_hashed) { + continue; } - /* Remove all hash nodes pointing to this page from the - hash chain */ + page = block->frame; - folds[n_cached] = fold; - n_cached++; -next_rec: - rec = page_rec_get_next_low(rec, page_rec_is_comp(rec)); - prev_fold = fold; - } + /* from btr_search_drop_page_hash_index() */ + n_fields = block->curr_n_fields; + n_bytes = block->curr_n_bytes; - for (i = 0; i < n_cached; i++) { - ha_remove_all_nodes_to_page(table, folds[i], page); - } + /* keeping latch order */ + rw_lock_s_unlock(&btr_search_latch); + released_search_latch = TRUE; + rw_lock_x_lock(&block->lock); - ut_a(index->search_info->ref_count > 0); - index->search_info->ref_count--; - block->is_hashed = FALSE; - block->index = NULL; + ut_a(n_fields + n_bytes > 0); + + n_recs = page_get_n_recs(page); + + /* Calculate and cache fold values into an array for fast deletion + from the hash index */ + + folds = mem_alloc(n_recs * sizeof(ulint)); + + n_cached = 0; + + rec = page_get_infimum_rec(page); + rec = page_rec_get_next_low(rec, page_is_comp(page)); + + index_id = btr_page_get_index_id(page); + ut_a(0 == ut_dulint_cmp(index_id, index->id)); + + prev_fold = 0; + + offsets = NULL; + + while (!page_rec_is_supremum(rec)) { + offsets = rec_get_offsets(rec, index, offsets, + n_fields + (n_bytes > 0), &heap); + ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0)); + fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id); + + if (fold == prev_fold && prev_fold != 0) { + + goto next_rec; + } + + /* Remove all hash nodes pointing to this page from the + hash chain */ + + folds[n_cached] = fold; + n_cached++; +next_rec: + rec = page_rec_get_next_low(rec, page_rec_is_comp(rec)); + prev_fold = fold; + } + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_empty(heap); + } + + rw_lock_x_lock(&btr_search_latch); + + if (UNIV_UNLIKELY(!block->is_hashed)) { + goto cleanup; + } + + ut_a(block->index == index); + + if (UNIV_UNLIKELY(block->curr_n_fields != n_fields) + || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) { + rw_lock_x_unlock(&btr_search_latch); + rw_lock_x_unlock(&block->lock); + + mem_free(folds); + + rw_lock_s_lock(&btr_search_latch); + goto retry; + } + + for (i = 0; i < n_cached; i++) { + + ha_remove_all_nodes_to_page(table, folds[i], page); + } + + ut_a(index->search_info->ref_count > 0); + index->search_info->ref_count--; + + block->is_hashed = FALSE; + block->index = NULL; + +cleanup: #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - if (UNIV_UNLIKELY(block->n_pointers)) { - /* Corruption */ - ut_print_timestamp(stderr); - fprintf(stderr, + if (UNIV_UNLIKELY(block->n_pointers)) { + /* Corruption */ + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Corruption of adaptive hash index. After dropping\n" "InnoDB: the hash index to a page of %s, still %lu hash nodes remain.\n", - index->name, (ulong) block->n_pointers); - } + index->name, (ulong) block->n_pointers); + } #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + rw_lock_x_unlock(&btr_search_latch); + rw_lock_x_unlock(&block->lock); - mem_free(folds); + mem_free(folds); + + rw_lock_s_lock(&btr_search_latch); + } } + } while (released_search_latch); - bpage = UT_LIST_GET_PREV(LRU, bpage); - } - - mutex_exit(&LRU_list_mutex); - rw_lock_x_unlock(&btr_search_latch); + rw_lock_s_unlock(&btr_search_latch); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); diff --git a/buf/buf0buddy.c b/buf/buf0buddy.c index 8ce2d1888ef..db94b4bed24 100644 --- a/buf/buf0buddy.c +++ b/buf/buf0buddy.c @@ -346,7 +346,7 @@ buf_buddy_alloc_low( if (have_page_hash_mutex) { rw_lock_x_unlock(&page_hash_latch); } - block = buf_LRU_get_free_block(0); + block = buf_LRU_get_free_block(); *lru = TRUE; //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); @@ -475,6 +475,7 @@ buf_buddy_relocate( if (size >= PAGE_ZIP_MIN_SIZE) { /* This is a compressed page. */ mutex_t* mutex; + ulint space, page_no; if (!have_page_hash_mutex) { mutex_exit(&zip_free_mutex); @@ -490,9 +491,9 @@ buf_buddy_relocate( pool), so there is nothing wrong about this. The mach_read_from_4() calls here will only trigger bogus Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */ - ulint space = mach_read_from_4( + space = mach_read_from_4( (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - ulint page_no = mach_read_from_4( + page_no = mach_read_from_4( (const byte*) src + FIL_PAGE_OFFSET); /* Suppress Valgrind warnings about conditional jump on uninitialized value. */ diff --git a/buf/buf0buf.c b/buf/buf0buf.c index 175e460753b..07dc09f572b 100644 --- a/buf/buf0buf.c +++ b/buf/buf0buf.c @@ -81,9 +81,9 @@ inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) block->page.offset, DPAH_SIZE << 3); block_hash_byte = block_hash >> 3; block_hash_offset = (byte) block_hash & 0x07; - if (block_hash_byte < 0 || block_hash_byte >= DPAH_SIZE) + if (block_hash_byte >= DPAH_SIZE) fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); - if (block_hash_offset < 0 || block_hash_offset > 7) + if (block_hash_offset > 7) fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) trx->distinct_page_access++; @@ -457,7 +457,7 @@ buf_page_is_corrupted( "you may have copied the InnoDB\n" "InnoDB: tablespace but not the InnoDB " "log files. See\n" - "InnoDB: " REFMAN "forcing-recovery.html\n" + "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" "InnoDB: for more information.\n", (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET), @@ -754,9 +754,9 @@ buf_block_init( block->modify_clock = 0; -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG block->page.file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ block->check_index_page_at_flush = FALSE; block->index = NULL; @@ -839,11 +839,13 @@ buf_chunk_init( ulint zip_hash_mem_size = 0; hash_table_t* zip_hash_tmp = NULL; ulint i; + ulint size_target; buf_shm_info_t* shm_info = NULL; /* Round down to a multiple of page size, although it already should be. */ mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE); + size_target = (mem_size / UNIV_PAGE_SIZE) - 1; srv_buffer_pool_shm_is_reused = FALSE; @@ -1044,6 +1046,10 @@ init_again: chunk->size = size; } + if (chunk->size > size_target) { + chunk->size = size_target; + } + if (shm_info && !(shm_info->is_new)) { /* convert the shared memory segment for reuse */ ptrdiff_t phys_offset; @@ -1829,7 +1835,7 @@ shrink_again: buf_LRU_make_block_old(&block->page); dirty++; - } else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE) + } else if (buf_LRU_free_block(&block->page, TRUE, FALSE) != BUF_LRU_FREED) { nonfree++; } @@ -2176,7 +2182,7 @@ buf_page_peek_if_search_hashed( return(is_hashed); } -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG /********************************************************************//** Sets file_page_was_freed TRUE if the page is found in the buffer pool. This function should be called when we free a file page and want the @@ -2198,6 +2204,8 @@ buf_page_set_file_page_was_freed( bpage = buf_page_hash_get(space, offset); if (bpage) { + /* bpage->file_page_was_freed can already hold + when this code is invoked from dict_drop_index_tree() */ bpage->file_page_was_freed = TRUE; } @@ -2236,7 +2244,7 @@ buf_page_reset_file_page_was_freed( return(bpage); } -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ /********************************************************************//** Get read access to a compressed page (usually of type @@ -2332,8 +2340,7 @@ err_exit: ut_a(block_mutex == &((buf_block_t*) bpage)->mutex); /* Discard the uncompressed page frame if possible. */ - if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE) - == BUF_LRU_FREED) { + if (buf_LRU_free_block(bpage, FALSE, FALSE) == BUF_LRU_FREED) { mutex_exit(block_mutex); goto lookup; @@ -2357,7 +2364,7 @@ got_block: buf_page_set_accessed_make_young(bpage, access_time); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!bpage->file_page_was_freed); #endif @@ -2820,7 +2827,7 @@ wait_until_unfixed: //mutex_exit(&buf_pool_zip_mutex); mutex_exit(block_mutex); - block = buf_LRU_get_free_block(0); + block = buf_LRU_get_free_block(); ut_a(block); block_mutex = &block->mutex; @@ -2973,8 +2980,7 @@ wait_until_unfixed: /* Try to evict the block from the buffer pool, to use the insert buffer as much as possible. */ - if (buf_LRU_free_block(&block->page, TRUE, NULL) - == BUF_LRU_FREED) { + if (buf_LRU_free_block(&block->page, TRUE, FALSE) == BUF_LRU_FREED) { buf_pool_mutex_exit(); mutex_exit(&block->mutex); fprintf(stderr, @@ -3006,7 +3012,7 @@ wait_until_unfixed: buf_page_set_accessed_make_young(&block->page, access_time); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!block->page.file_page_was_freed); #endif @@ -3182,7 +3188,7 @@ buf_page_optimistic_get( ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(block->page.file_page_was_freed == FALSE); #endif if (innobase_get_slow_log()) { @@ -3302,7 +3308,7 @@ buf_page_get_known_nowait( ut_a(block->page.buf_fix_count > 0); ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(block->page.file_page_was_freed == FALSE); #endif @@ -3393,9 +3399,9 @@ buf_page_try_get_func( ut_a(block->page.buf_fix_count > 0); ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(block->page.file_page_was_freed == FALSE); -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); buf_pool->stat.n_page_gets++; @@ -3425,9 +3431,9 @@ buf_page_init_low( bpage->oldest_modification = 0; HASH_INVALIDATE(bpage, hash); bpage->is_corrupt = FALSE; -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG bpage->file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ } /********************************************************************//** @@ -3555,7 +3561,7 @@ buf_page_init_for_read( && UNIV_LIKELY(!recv_recovery_is_on())) { block = NULL; } else { - block = buf_LRU_get_free_block(0); + block = buf_LRU_get_free_block(); ut_ad(block); } @@ -3681,6 +3687,7 @@ err_exit: bpage->state = BUF_BLOCK_ZIP_PAGE; bpage->space = space; bpage->offset = offset; + bpage->space_was_being_deleted = FALSE; #ifdef UNIV_DEBUG bpage->in_page_hash = FALSE; @@ -3749,7 +3756,7 @@ buf_page_create( ut_ad(mtr->state == MTR_ACTIVE); ut_ad(space || !zip_size); - free_block = buf_LRU_get_free_block(0); + free_block = buf_LRU_get_free_block(); //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); @@ -3761,9 +3768,9 @@ buf_page_create( #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(space, offset) == 0); #endif -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG block->page.file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ /* Page can be found in buf_pool */ //buf_pool_mutex_exit(); @@ -3999,7 +4006,7 @@ corrupt: "InnoDB: TABLE to scan your" " table for corruption.\n" "InnoDB: See also " - REFMAN "forcing-recovery.html\n" + REFMAN "forcing-innodb-recovery.html\n" "InnoDB: about forcing recovery.\n", stderr); if (srv_pass_corrupt_table && !trx_sys_sys_space(bpage->space) diff --git a/buf/buf0flu.c b/buf/buf0flu.c index 5db6f816aab..cda8d3b170e 100644 --- a/buf/buf0flu.c +++ b/buf/buf0flu.c @@ -367,7 +367,7 @@ buf_flush_ready_for_replace( if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) { - return(bpage->oldest_modification == 0 + return((bpage->oldest_modification == 0 || bpage->space_was_being_deleted) && buf_page_get_io_fix(bpage) == BUF_IO_NONE && bpage->buf_fix_count == 0); } @@ -406,6 +406,13 @@ buf_flush_ready_for_flush( && buf_page_get_io_fix(bpage) == BUF_IO_NONE) { ut_ad(bpage->in_flush_list); + if (bpage->space_was_being_deleted) { + /* should be removed from flush_list here */ + /* because buf_flush_try_neighbors() cannot flush without fil_space_get_size(space) */ + buf_flush_remove(bpage); + return(FALSE); + } + if (flush_type != BUF_FLUSH_LRU) { return(TRUE); diff --git a/buf/buf0lru.c b/buf/buf0lru.c index 1c2c3d7f04f..33db5dce193 100644 --- a/buf/buf0lru.c +++ b/buf/buf0lru.c @@ -530,6 +530,30 @@ next_page_no_mutex: } } +/******************************************************************//** +*/ +UNIV_INTERN +void +buf_LRU_mark_space_was_deleted( +/*===========================*/ + ulint id) /*!< in: space id */ +{ + buf_page_t* bpage; + + mutex_enter(&LRU_list_mutex); + + bpage = UT_LIST_GET_FIRST(buf_pool->LRU); + + while (bpage != NULL) { + if (buf_page_get_space(bpage) == id) { + bpage->space_was_being_deleted = TRUE; + } + bpage = UT_LIST_GET_NEXT(LRU, bpage); + } + + mutex_exit(&LRU_list_mutex); +} + /********************************************************************//** Insert a compressed block into buf_pool->zip_clean in the LRU order. */ UNIV_INTERN @@ -618,7 +642,7 @@ restart: ut_ad(block->in_unzip_LRU_list); ut_ad(block->page.in_LRU_list); - freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex); + freed = buf_LRU_free_block(&block->page, FALSE, have_LRU_mutex); mutex_exit(&block->mutex); switch (freed) { @@ -690,7 +714,7 @@ restart: ut_ad(bpage->in_LRU_list); accessed = buf_page_is_accessed(bpage); - freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex); + freed = buf_LRU_free_block(bpage, TRUE, have_LRU_mutex); mutex_exit(block_mutex); switch (freed) { @@ -876,10 +900,8 @@ LRU list to the free list. @return the free control block, in state BUF_BLOCK_READY_FOR_USE */ UNIV_INTERN buf_block_t* -buf_LRU_get_free_block( -/*===================*/ - ulint zip_size) /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_LRU_get_free_block(void) +/*========================*/ { buf_block_t* block = NULL; ibool freed; @@ -955,28 +977,10 @@ loop: /* If there is a block in the free list, take it */ block = buf_LRU_get_free_only(); + //buf_pool_mutex_exit(); + if (block) { - -#ifdef UNIV_DEBUG - block->page.zip.m_start = -#endif /* UNIV_DEBUG */ - block->page.zip.m_end = - block->page.zip.m_nonempty = - block->page.zip.n_blobs = 0; - - if (UNIV_UNLIKELY(zip_size)) { - ibool lru; - page_zip_set_size(&block->page.zip, zip_size); - mutex_enter(&LRU_list_mutex); - block->page.zip.data = buf_buddy_alloc(zip_size, &lru, FALSE); - mutex_exit(&LRU_list_mutex); - UNIV_MEM_DESC(block->page.zip.data, zip_size, block); - } else { - page_zip_set_size(&block->page.zip, 0); - block->page.zip.data = NULL; - } - - //buf_pool_mutex_exit(); + memset(&block->page.zip, 0, sizeof block->page.zip); if (started_monitor) { srv_print_innodb_monitor = mon_value_was; @@ -988,8 +992,6 @@ loop: /* If no block was in the free list, search from the end of the LRU list and try to free a block there */ - //buf_pool_mutex_exit(); - freed = buf_LRU_search_and_free_block(n_iterations); if (freed > 0) { @@ -1471,10 +1473,6 @@ buf_LRU_free_block( buf_page_t* bpage, /*!< in: block to be freed */ ibool zip, /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ - ibool* buf_pool_mutex_released, - /*!< in: pointer to a variable that will - be assigned TRUE if buf_pool_mutex - was temporarily released, or NULL */ ibool have_LRU_mutex) { buf_page_t* b = NULL; @@ -1498,6 +1496,10 @@ buf_LRU_free_block( return(BUF_LRU_NOT_FREED); } + if (bpage->space_was_being_deleted && bpage->oldest_modification != 0) { + buf_flush_remove(bpage); + } + #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0); #endif /* UNIV_IBUF_COUNT_DEBUG */ @@ -1685,10 +1687,6 @@ not_freed: b->io_fix = BUF_IO_READ; } - if (buf_pool_mutex_released) { - *buf_pool_mutex_released = TRUE; - } - //buf_pool_mutex_exit(); mutex_exit(&LRU_list_mutex); rw_lock_x_unlock(&page_hash_latch); @@ -2093,6 +2091,7 @@ buf_LRU_stat_update(void) /*=====================*/ { buf_LRU_stat_t* item; + buf_LRU_stat_t cur_stat; /* If we haven't started eviction yet then don't update stats. */ if (buf_pool->freed_page_clock == 0) { @@ -2107,12 +2106,19 @@ buf_LRU_stat_update(void) buf_LRU_stat_arr_ind++; buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL; - /* Add the current value and subtract the obsolete entry. */ - buf_LRU_stat_sum.io += buf_LRU_stat_cur.io - item->io; - buf_LRU_stat_sum.unzip += buf_LRU_stat_cur.unzip - item->unzip; + /* Add the current value and subtract the obsolete entry. + Since buf_LRU_stat_cur is not protected by any mutex, + it can be changing between adding to buf_LRU_stat_sum + and copying to item. Assign it to local variables to make + sure the same value assign to the buf_LRU_stat_sum + and item */ + cur_stat = buf_LRU_stat_cur; + + buf_LRU_stat_sum.io += cur_stat.io - item->io; + buf_LRU_stat_sum.unzip += cur_stat.unzip - item->unzip; /* Put current entry in the array. */ - memcpy(item, &buf_LRU_stat_cur, sizeof *item); + memcpy(item, &cur_stat, sizeof *item); //buf_pool_mutex_exit(); mutex_exit(&buf_pool_mutex); @@ -2361,8 +2367,7 @@ buf_LRU_file_restore(void) continue; } - if (fil_area_is_exist(space_id, zip_size, page_no, 0, - zip_size ? zip_size : UNIV_PAGE_SIZE)) { + if (fil_is_exist(space_id, page_no)) { tablespace_version = fil_space_get_version(space_id); diff --git a/dict/dict0boot.c b/dict/dict0boot.c index 43cfced65a0..2b6a208321d 100644 --- a/dict/dict0boot.c +++ b/dict/dict0boot.c @@ -465,16 +465,20 @@ dict_boot(void) ut_a(error == DB_SUCCESS); /*-------------------------*/ - table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 3, 0); + table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 4, 0); table->n_mysql_handles_opened = 1; /* for pin */ dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "NON_NULL_VALS", DATA_BINARY, 0, 0); /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ #if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 #error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" +#endif +#if DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2 +#error "DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2" #endif table->id = DICT_STATS_ID; diff --git a/dict/dict0crea.c b/dict/dict0crea.c index 45765949496..99b9f266ddc 100644 --- a/dict/dict0crea.c +++ b/dict/dict0crea.c @@ -529,7 +529,7 @@ dict_create_sys_stats_tuple( sys_stats = dict_sys->sys_stats; - entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + entry = dtuple_create(heap, 4 + DATA_N_SYS_COLS); dict_table_copy_types(entry, sys_stats); @@ -548,6 +548,11 @@ dict_create_sys_stats_tuple( ptr = mem_heap_alloc(heap, 8); mach_write_to_8(ptr, ut_dulint_zero); /* initial value is 0 */ dfield_set_data(dfield, ptr, 8); + /* 5: NON_NULL_VALS ------------------*/ + dfield = dtuple_get_nth_field(entry, 3/*NON_NULL_VALS*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, ut_dulint_zero); /* initial value is 0 */ + dfield_set_data(dfield, ptr, 8); return(entry); } diff --git a/dict/dict0dict.c b/dict/dict0dict.c index 4d99907f093..18880a5c72c 100644 --- a/dict/dict0dict.c +++ b/dict/dict0dict.c @@ -937,7 +937,7 @@ dict_table_rename_in_cache( dict_foreign_t* foreign; dict_index_t* index; ulint fold; - char old_name[MAX_TABLE_NAME_LEN + 1]; + char old_name[MAX_FULL_NAME_LEN + 1]; ut_ad(table); ut_ad(mutex_own(&(dict_sys->mutex))); @@ -949,7 +949,7 @@ dict_table_rename_in_cache( ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: too long table name: '%s', " "max length is %d\n", table->name, - MAX_TABLE_NAME_LEN); + MAX_FULL_NAME_LEN); ut_error; } @@ -999,11 +999,11 @@ dict_table_rename_in_cache( ut_fold_string(old_name), table); if (strlen(new_name) > strlen(table->name)) { - /* We allocate MAX_TABLE_NAME_LEN+1 bytes here to avoid + /* We allocate MAX_FULL_NAME_LEN + 1 bytes here to avoid memory fragmentation, we assume a repeated calls of ut_realloc() with the same size do not cause fragmentation */ - ut_a(strlen(new_name) <= MAX_TABLE_NAME_LEN); - table->name = ut_realloc(table->name, MAX_TABLE_NAME_LEN + 1); + ut_a(strlen(new_name) <= MAX_FULL_NAME_LEN); + table->name = ut_realloc(table->name, MAX_FULL_NAME_LEN + 1); } memcpy(table->name, new_name, strlen(new_name) + 1); @@ -1732,6 +1732,12 @@ undo_size_ok: new_index->heap, (1 + dict_index_get_n_unique(new_index)) * sizeof(ib_int64_t)); + + new_index->stat_n_non_null_key_vals = mem_heap_zalloc( + new_index->heap, + (1 + dict_index_get_n_unique(new_index)) + * sizeof(*new_index->stat_n_non_null_key_vals)); + /* Give some sensible values to stat_n_... in case we do not calculate statistics quickly enough */ @@ -2756,7 +2762,7 @@ dict_scan_to( quote = '\0'; } else if (quote) { /* Within quotes: do nothing. */ - } else if (*ptr == '`' || *ptr == '"') { + } else if (*ptr == '`' || *ptr == '"' || *ptr == '\'') { /* Starting quote: remember the quote character. */ quote = *ptr; } else { @@ -4331,15 +4337,18 @@ dict_reload_statistics( ulint key_cols; ulint n_cols; const rec_t* rec; + ulint n_fields; const byte* field; ulint len; ib_int64_t* stat_n_diff_key_vals_tmp; + ib_int64_t* stat_n_non_null_key_vals_tmp; byte* buf; ulint i; mtr_t mtr; n_cols = dict_index_get_n_unique(index); stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + stat_n_non_null_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); sys_stats = dict_sys->sys_stats; sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); @@ -4375,9 +4384,13 @@ dict_reload_statistics( } if (rec_get_deleted_flag(rec, 0)) { + /* don't count */ + i--; goto next_rec; } + n_fields = rec_get_n_fields_old(rec); + field = rec_get_nth_field_old(rec, 1, &len); ut_a(len == 4); @@ -4389,6 +4402,21 @@ dict_reload_statistics( ut_a(len == 8); stat_n_diff_key_vals_tmp[i] = ut_conv_dulint_to_longlong(mach_read_from_8(field)); + + if (n_fields > DICT_SYS_STATS_NON_NULL_VALS_FIELD) { + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_NON_NULL_VALS_FIELD, &len); + ut_a(len == 8); + + stat_n_non_null_key_vals_tmp[i] = ut_conv_dulint_to_longlong(mach_read_from_8(field)); + } else { + /* not enough fields: should be older */ + fprintf(stderr, "InnoDB: Notice: stats for %s/%s (%lu/%lu)" + " in SYS_STATS seems older format. " + "Please execute ANALYZE TABLE for it.\n", + index->table_name, index->name, i, n_cols); + + stat_n_non_null_key_vals_tmp[i] = ((ib_int64_t)(-1)); + } next_rec: btr_pcur_move_to_next_user_rec(&pcur, &mtr); } @@ -4398,6 +4426,12 @@ next_rec: for (i = 0; i <= n_cols; i++) { index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i]; + if (stat_n_non_null_key_vals_tmp[i] == ((ib_int64_t)(-1))) { + /* approximate value */ + index->stat_n_non_null_key_vals[i] = stat_n_diff_key_vals_tmp[n_cols]; + } else { + index->stat_n_non_null_key_vals[i] = stat_n_non_null_key_vals_tmp[i]; + } } } /*===========================================*/ @@ -4442,18 +4476,22 @@ dict_store_statistics( ulint n_cols; ulint rests; const rec_t* rec; + ulint n_fields; const byte* field; ulint len; ib_int64_t* stat_n_diff_key_vals_tmp; + ib_int64_t* stat_n_non_null_key_vals_tmp; byte* buf; ulint i; mtr_t mtr; n_cols = dict_index_get_n_unique(index); stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + stat_n_non_null_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); for (i = 0; i <= n_cols; i++) { stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i]; + stat_n_non_null_key_vals_tmp[i] = index->stat_n_non_null_key_vals[i]; } sys_stats = dict_sys->sys_stats; @@ -4481,12 +4519,24 @@ dict_store_statistics( || ut_dulint_cmp(mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)), index->id)) { /* not found */ - btr_pcur_close(&pcur); - mtr_commit(&mtr); + + break; } if (rec_get_deleted_flag(rec, 0)) { + /* don't count */ + i--; + goto next_rec; + } + + n_fields = rec_get_n_fields_old(rec); + + if (n_fields <= DICT_SYS_STATS_NON_NULL_VALS_FIELD) { + /* not update for the older smaller format */ + fprintf(stderr, "InnoDB: Notice: stats for %s/%s (%lu/%lu)" + " in SYS_STATS seems older format. Please ANALYZE TABLE it.\n", + index->table_name, index->name, i, n_cols); goto next_rec; } @@ -4503,6 +4553,14 @@ dict_store_statistics( (ulint) stat_n_diff_key_vals_tmp[key_cols] & 0xFFFFFFFF), &mtr); + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_NON_NULL_VALS_FIELD, &len); + ut_a(len == 8); + + mlog_write_dulint((byte*)field, + ut_dulint_create((ulint) (stat_n_non_null_key_vals_tmp[key_cols] >> 32), + (ulint) stat_n_non_null_key_vals_tmp[key_cols] & 0xFFFFFFFF), + &mtr); + rests--; next_rec: @@ -4635,6 +4693,10 @@ dict_update_statistics( for (i = dict_index_get_n_unique(index); i; ) { index->stat_n_diff_key_vals[i--] = 1; } + + memset(index->stat_n_non_null_key_vals, 0, + (1 + dict_index_get_n_unique(index)) + * sizeof(*index->stat_n_non_null_key_vals)); } index = dict_table_get_next_index(index); @@ -4662,6 +4724,78 @@ end: dict_table_stats_unlock(table, RW_X_LATCH); } +/*********************************************************************//** +*/ +UNIV_INTERN +ibool +dict_is_older_statistics( +/*=====================*/ + dict_index_t* index) +{ + mem_heap_t* heap; + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + const rec_t* rec; + ulint n_fields; + ulint len; + byte* buf; + mtr_t mtr; + + heap = mem_heap_create(100); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + +next_rec: + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || ut_dulint_cmp(mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)), + index->id)) { + /* not found */ + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + /* no statistics == not older statistics */ + return(FALSE); + } + + if (rec_get_deleted_flag(rec, 0)) { + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + goto next_rec; + } + + n_fields = rec_get_n_fields_old(rec); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + if (n_fields > DICT_SYS_STATS_NON_NULL_VALS_FIELD) { + return(FALSE); + } else { + return(TRUE); + } +} + /**********************************************************************//** Prints info of a foreign key constraint. */ static diff --git a/dict/dict0load.c b/dict/dict0load.c index 758ee8b4969..edd77e2530f 100644 --- a/dict/dict0load.c +++ b/dict/dict0load.c @@ -40,6 +40,7 @@ Created 4/24/1996 Heikki Tuuri #include "rem0cmp.h" #include "srv0start.h" #include "srv0srv.h" +#include "trx0sys.h" /****************************************************************//** Compare the name of an index column. diff --git a/fil/fil0fil.c b/fil/fil0fil.c index 52af7d45f25..fe0f591e423 100644 --- a/fil/fil0fil.c +++ b/fil/fil0fil.c @@ -242,6 +242,7 @@ the ib_logfiles form a 'space' and it is handled here */ struct fil_system_struct { #ifndef UNIV_HOTBACKUP mutex_t mutex; /*!< The mutex protecting the cache */ + mutex_t file_extend_mutex; #endif /* !UNIV_HOTBACKUP */ hash_table_t* spaces; /*!< The hash table of spaces in the system; they are hashed on the space @@ -690,7 +691,7 @@ fil_node_open_file( ut_a(space->purpose != FIL_LOG); ut_a(!trx_sys_sys_space(space->id)); - if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { + if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * (lint)UNIV_PAGE_SIZE) { fprintf(stderr, "InnoDB: Error: the size of single-table" " tablespace file %s\n" @@ -816,7 +817,7 @@ fil_node_close_file( ut_ad(node && system); ut_ad(mutex_own(&(system->mutex))); ut_a(node->open); - ut_a(node->n_pending == 0); + ut_a(node->n_pending == 0 || srv_lazy_drop_table); ut_a(node->n_pending_flushes == 0); ut_a(node->modification_counter == node->flush_counter); @@ -1028,7 +1029,7 @@ fil_node_free( ut_ad(node && system && space); ut_ad(mutex_own(&(system->mutex))); ut_a(node->magic_n == FIL_NODE_MAGIC_N); - ut_a(node->n_pending == 0); + ut_a(node->n_pending == 0 || srv_lazy_drop_table); if (node->open) { /* We fool the assertion in fil_node_close_file() to think @@ -1549,6 +1550,7 @@ fil_init( fil_system = mem_zalloc(sizeof(fil_system_t)); mutex_create(&fil_system->mutex, SYNC_ANY_LATCH); + mutex_create(&fil_system->file_extend_mutex, SYNC_OUTER_ANY_LATCH); fil_system->spaces = hash_create(hash_size); fil_system->name_hash = hash_create(hash_size); @@ -2295,7 +2297,11 @@ try_again: completely and permanently. The flag is_being_deleted also prevents fil_flush() from being applied to this tablespace. */ + if (srv_lazy_drop_table) { + buf_LRU_mark_space_was_deleted(id); + } else { buf_LRU_invalidate_tablespace(id); + } #endif /* printf("Deleting tablespace %s id %lu\n", space->name, id); */ @@ -3095,8 +3101,8 @@ fil_open_single_table_tablespace( space_id = fsp_header_get_space_id(page); space_flags = fsp_header_get_flags(page); - if (srv_expand_import - && (space_id != id || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) { + if (srv_expand_import) { + ibool file_is_corrupt = FALSE; byte* buf3; byte* descr_page; @@ -3167,8 +3173,10 @@ fil_open_single_table_tablespace( if (size_bytes < free_limit_bytes) { free_limit_bytes = size_bytes; - fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath); - file_is_corrupt = TRUE; + if (size_bytes >= (ib_int64_t) (FSP_EXTENT_SIZE * UNIV_PAGE_SIZE)) { + fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath); + file_is_corrupt = TRUE; + } } /* get cruster index information */ @@ -3314,7 +3322,7 @@ skip_info: file_is_corrupt = TRUE; descr_is_corrupt = TRUE; } else { - ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES); + descr_is_corrupt = FALSE; } @@ -3778,7 +3786,7 @@ fil_load_single_table_tablespace( size = (((ib_int64_t)size_high) << 32) + (ib_int64_t)size_low; #ifndef UNIV_HOTBACKUP - if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { + if (size < FIL_IBD_FILE_INITIAL_SIZE * (lint)UNIV_PAGE_SIZE) { fprintf(stderr, "InnoDB: Error: the size of single-table tablespace" " file %s\n" @@ -3798,7 +3806,7 @@ fil_load_single_table_tablespace( /* Align the memory for file i/o if we might have O_DIRECT set */ page = ut_align(buf2, UNIV_PAGE_SIZE); - if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { + if (size >= FIL_IBD_FILE_INITIAL_SIZE * (lint)UNIV_PAGE_SIZE) { success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); /* We have to read the tablespace id from the file */ @@ -4348,6 +4356,10 @@ fil_extend_space_to_desired_size( ulint page_size; ibool success = TRUE; + /* file_extend_mutex is for http://bugs.mysql.com/56433 */ + /* to protect from the other fil_extend_space_to_desired_size() */ + /* during temprary releasing &fil_system->mutex */ + mutex_enter(&fil_system->file_extend_mutex); fil_mutex_enter_and_prepare_for_io(space_id); space = fil_space_get_by_id(space_id); @@ -4359,6 +4371,7 @@ fil_extend_space_to_desired_size( *actual_size = space->size; mutex_exit(&fil_system->mutex); + mutex_exit(&fil_system->file_extend_mutex); return(TRUE); } @@ -4391,6 +4404,8 @@ fil_extend_space_to_desired_size( offset_low = ((start_page_no - file_start_page_no) % (4096 * ((1024 * 1024) / page_size))) * page_size; + + mutex_exit(&fil_system->mutex); #ifdef UNIV_HOTBACKUP success = os_file_write(node->name, node->handle, buf, offset_low, offset_high, @@ -4400,8 +4415,10 @@ fil_extend_space_to_desired_size( node->name, node->handle, buf, offset_low, offset_high, page_size * n_pages, - NULL, NULL, NULL); + NULL, NULL, space_id, NULL); #endif + mutex_enter(&fil_system->mutex); + if (success) { node->size += n_pages; space->size += n_pages; @@ -4447,6 +4464,7 @@ fil_extend_space_to_desired_size( printf("Extended %s to %lu, actual size %lu pages\n", space->name, size_after_extend, *actual_size); */ mutex_exit(&fil_system->mutex); + mutex_exit(&fil_system->file_extend_mutex); fil_flush(space_id); @@ -4811,6 +4829,22 @@ _fil_io( srv_data_written+= len; } + /* if the table space was already deleted, space might not exist already. */ + if (message + && space_id < SRV_LOG_SPACE_FIRST_ID + && ((buf_page_t*)message)->space_was_being_deleted) { + + if (mode == OS_AIO_NORMAL) { + buf_page_io_complete(message, trx); + return(DB_SUCCESS); /*fake*/ + } + if (type == OS_FILE_READ) { + return(DB_TABLESPACE_DELETED); + } else { + return(DB_SUCCESS); /*fake*/ + } + } + /* Reserve the fil_system mutex and make sure that we can open at least one file while holding it, if the file is not already open */ @@ -4940,10 +4974,24 @@ _fil_io( #else /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset_low, offset_high, len, node, message, trx); + offset_low, offset_high, len, node, message, space_id, trx); #endif } /**/ + /* if the table space was already deleted, space might not exist already. */ + if (message + && space_id < SRV_LOG_SPACE_FIRST_ID + && ((buf_page_t*)message)->space_was_being_deleted) { + + if (mode == OS_AIO_SYNC) { + if (type == OS_FILE_READ) { + return(DB_TABLESPACE_DELETED); + } else { + return(DB_SUCCESS); /*fake*/ + } + } + } + ut_a(ret); if (mode == OS_AIO_SYNC) { @@ -4966,18 +5014,10 @@ _fil_io( Confirm whether the parameters are valid or not */ UNIV_INTERN ibool -fil_area_is_exist( +fil_is_exist( /*==============*/ ulint space_id, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint block_offset, /*!< in: offset in number of blocks */ - ulint byte_offset, /*!< in: remainder of offset in bytes; in - aio this must be divisible by the OS block - size */ - ulint len) /*!< in: how many bytes to read or write; this - must not cross a file boundary; in aio this - must be a block size multiple */ + ulint block_offset) /*!< in: offset in number of blocks */ { fil_space_t* space; fil_node_t* node; @@ -5051,6 +5091,7 @@ fil_aio_wait( fil_node_t* fil_node; void* message; ulint type; + ulint space_id = 0; ut_ad(fil_validate()); @@ -5058,7 +5099,7 @@ fil_aio_wait( srv_set_io_thread_op_info(segment, "native aio handle"); #ifdef WIN_ASYNC_IO ret = os_aio_windows_handle(segment, 0, &fil_node, - &message, &type); + &message, &type, &space_id); #else ret = 0; /* Eliminate compiler warning */ ut_error; @@ -5067,7 +5108,22 @@ fil_aio_wait( srv_set_io_thread_op_info(segment, "simulated aio handle"); ret = os_aio_simulated_handle(segment, &fil_node, - &message, &type); + &message, &type, &space_id); + } + + /* if the table space was already deleted, fil_node might not exist already. */ + if (message + && space_id < SRV_LOG_SPACE_FIRST_ID + && ((buf_page_t*)message)->space_was_being_deleted) { + + /* intended not to be uncompress read page */ + ut_a(buf_page_get_io_fix(message) == BUF_IO_WRITE + || !buf_page_get_zip_size(message) + || buf_page_get_state(message) != BUF_BLOCK_FILE_PAGE); + + srv_set_io_thread_op_info(segment, "complete io for buf page"); + buf_page_io_complete(message, NULL); + return; } ut_a(ret); diff --git a/fsp/fsp0fsp.c b/fsp/fsp0fsp.c index c5a28d60880..c8e4f8e269c 100644 --- a/fsp/fsp0fsp.c +++ b/fsp/fsp0fsp.c @@ -657,17 +657,18 @@ xdes_calc_descriptor_page( 0 for uncompressed pages */ ulint offset) /*!< in: page offset */ { -#ifndef DOXYGEN /* Doxygen gets confused of these */ +//#ifndef DOXYGEN /* Doxygen gets confused of these */ //# if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET // + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE //# error //# endif -# if PAGE_ZIP_MIN_SIZE <= XDES_ARR_OFFSET \ - + (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE -# error -# endif -#endif /* !DOXYGEN */ +//# if PAGE_ZIP_MIN_SIZE <= XDES_ARR_OFFSET +// + (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE +//# error +//# endif +//#endif /* !DOXYGEN */ ut_a(UNIV_PAGE_SIZE > XDES_ARR_OFFSET + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE); + ut_a(PAGE_ZIP_MIN_SIZE > XDES_ARR_OFFSET + (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE); ut_ad(ut_is_2pow(zip_size)); if (!zip_size) { @@ -3359,7 +3360,7 @@ fseg_free_page_low( "InnoDB: database!\n", (ulong) page); crash: fputs("InnoDB: Please refer to\n" - "InnoDB: " REFMAN "forcing-recovery.html\n" + "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" "InnoDB: about forcing recovery.\n", stderr); ut_error; } @@ -3477,9 +3478,9 @@ fseg_free_page( fseg_free_page_low(seg_inode, space, zip_size, page, mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG buf_page_set_file_page_was_freed(space, page); -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ } /**********************************************************************//** @@ -3546,13 +3547,13 @@ fseg_free_extent( fsp_free_extent(space, zip_size, page, mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG for (i = 0; i < FSP_EXTENT_SIZE; i++) { buf_page_set_file_page_was_freed(space, first_page_in_extent + i); } -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ } /**********************************************************************//** diff --git a/handler/ha_innodb.cc b/handler/ha_innodb.cc index 519984f2f73..5722033f4b4 100644 --- a/handler/ha_innodb.cc +++ b/handler/ha_innodb.cc @@ -204,6 +204,25 @@ static char* internal_innobase_data_file_path = NULL; static char* innodb_version_str = (char*) INNODB_VERSION_STR; +/** Possible values for system variable "innodb_stats_method". The values +are defined the same as its corresponding MyISAM system variable +"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */ +static const char* innodb_stats_method_names[] = { + "nulls_equal", + "nulls_unequal", + "nulls_ignored", + NullS +}; + +/** Used to define an enumerate type of the system variable innodb_stats_method. +This is the same as "myisam_stats_method_typelib" */ +static TYPELIB innodb_stats_method_typelib = { + array_elements(innodb_stats_method_names) - 1, + "innodb_stats_method_typelib", + innodb_stats_method_names, + NULL +}; + /* The following counter is used to convey information to InnoDB about server activity: in selects it is not sensible to call srv_active_wake_master_thread after each fetch or search, we only do @@ -5862,6 +5881,11 @@ ha_innobase::index_read( case DB_SUCCESS: error = 0; table->status = 0; +#ifdef EXTENDED_FOR_USERSTAT + rows_read++; + if (active_index < MAX_KEY) + index_rows_read[active_index]++; +#endif break; case DB_RECORD_NOT_FOUND: error = HA_ERR_KEY_NOT_FOUND; @@ -6085,7 +6109,7 @@ ha_innobase::general_fetch( table->status = 0; #ifdef EXTENDED_FOR_USERSTAT rows_read++; - if (active_index >= 0 && active_index < MAX_KEY) + if (active_index < MAX_KEY) index_rows_read[active_index]++; #endif break; @@ -6421,6 +6445,16 @@ create_table_def( DBUG_RETURN(HA_ERR_GENERIC); } + /* MySQL does the name length check. But we do additional check + on the name length here */ + if (strlen(table_name) > MAX_FULL_NAME_LEN) { + push_warning_printf( + (THD*) trx->mysql_thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_TABLE_NAME, + "InnoDB: Table Name or Database Name is too long"); + DBUG_RETURN(ER_TABLE_NAME); + } + n_cols = form->s->fields; /* We pass 0 as the space id, and determine at a lower level the space @@ -6705,10 +6739,11 @@ create_clustered_index_when_no_primary( /*****************************************************************//** Return a display name for the row format @return row format name */ - -const char *get_row_format_name( -/*============================*/ -enum row_type row_format) /*!< in: Row Format */ +UNIV_INTERN +const char* +get_row_format_name( +/*================*/ + enum row_type row_format) /*!< in: Row Format */ { switch (row_format) { case ROW_TYPE_COMPACT: @@ -6723,12 +6758,38 @@ enum row_type row_format) /*!< in: Row Format */ return("DEFAULT"); case ROW_TYPE_FIXED: return("FIXED"); - default: + case ROW_TYPE_PAGE: + case ROW_TYPE_NOT_USED: break; } return("NOT USED"); } +/** If file-per-table is missing, issue warning and set ret false */ +#define CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE \ + if (!srv_file_per_table) { \ + push_warning_printf( \ + thd, MYSQL_ERROR::WARN_LEVEL_WARN, \ + ER_ILLEGAL_HA_CREATE_OPTION, \ + "InnoDB: ROW_FORMAT=%s requires" \ + " innodb_file_per_table.", \ + get_row_format_name(row_format)); \ + ret = FALSE; \ + } + +/** If file-format is Antelope, issue warning and set ret false */ +#define CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE \ + if (srv_file_format < DICT_TF_FORMAT_ZIP) { \ + push_warning_printf( \ + thd, MYSQL_ERROR::WARN_LEVEL_WARN, \ + ER_ILLEGAL_HA_CREATE_OPTION, \ + "InnoDB: ROW_FORMAT=%s requires" \ + " innodb_file_format > Antelope.", \ + get_row_format_name(row_format)); \ + ret = FALSE; \ + } + + /*****************************************************************//** Validates the create options. We may build on this function in future. For now, it checks two specifiers: @@ -6746,7 +6807,7 @@ create_options_are_valid( { ibool kbs_specified = FALSE; ibool ret = TRUE; - enum row_type row_type = form->s->row_type; + enum row_type row_format = form->s->row_type; ut_ad(thd != NULL); @@ -6755,23 +6816,6 @@ create_options_are_valid( return(TRUE); } - /* Check for a valid Innodb ROW_FORMAT specifier. For example, - ROW_TYPE_FIXED can be sent to Innodb */ - switch (row_type) { - case ROW_TYPE_COMPACT: - case ROW_TYPE_COMPRESSED: - case ROW_TYPE_DYNAMIC: - case ROW_TYPE_REDUNDANT: - case ROW_TYPE_DEFAULT: - break; - default: - push_warning( - thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: invalid ROW_FORMAT specifier."); - ret = FALSE; - } - ut_ad(form != NULL); ut_ad(create_info != NULL); @@ -6784,7 +6828,23 @@ create_options_are_valid( case 4: case 8: case 16: - /* Valid value. */ + /* Valid KEY_BLOCK_SIZE, check its dependencies. */ + if (!srv_file_per_table) { + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: KEY_BLOCK_SIZE requires" + " innodb_file_per_table."); + ret = FALSE; + } + if (srv_file_format < DICT_TF_FORMAT_ZIP) { + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: KEY_BLOCK_SIZE requires" + " innodb_file_format > Antelope."); + ret = FALSE; + } break; default: push_warning_printf( @@ -6794,72 +6854,43 @@ create_options_are_valid( " Valid values are [1, 2, 4, 8, 16]", create_info->key_block_size); ret = FALSE; + break; } } - /* If KEY_BLOCK_SIZE was specified, check for its - dependencies. */ - if (kbs_specified && !srv_file_per_table) { - push_warning( - thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE" - " requires innodb_file_per_table."); - ret = FALSE; - } - - if (kbs_specified && srv_file_format < DICT_TF_FORMAT_ZIP) { - push_warning( - thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE requires" - " innodb_file_format > Antelope."); - ret = FALSE; - } - - switch (row_type) { + /* Check for a valid Innodb ROW_FORMAT specifier and + other incompatibilities. */ + switch (row_format) { case ROW_TYPE_COMPRESSED: - case ROW_TYPE_DYNAMIC: - /* These two ROW_FORMATs require srv_file_per_table - and srv_file_format > Antelope */ - if (!srv_file_per_table) { - push_warning_printf( - thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_per_table.", - get_row_format_name(row_type)); - ret = FALSE; - } - - if (srv_file_format < DICT_TF_FORMAT_ZIP) { - push_warning_printf( - thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s requires" - " innodb_file_format > Antelope.", - get_row_format_name(row_type)); - ret = FALSE; - } - default: + CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE; + CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE; break; - } - - switch (row_type) { - case ROW_TYPE_REDUNDANT: - case ROW_TYPE_COMPACT: case ROW_TYPE_DYNAMIC: - /* KEY_BLOCK_SIZE is only allowed with Compressed or Default */ + CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE; + CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE; + /* fall through since dynamic also shuns KBS */ + case ROW_TYPE_COMPACT: + case ROW_TYPE_REDUNDANT: if (kbs_specified) { push_warning_printf( thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, "InnoDB: cannot specify ROW_FORMAT = %s" " with KEY_BLOCK_SIZE.", - get_row_format_name(row_type)); - ret = FALSE; + get_row_format_name(row_format)); + ret = FALSE; } - default: + break; + case ROW_TYPE_DEFAULT: + break; + case ROW_TYPE_FIXED: + case ROW_TYPE_PAGE: + case ROW_TYPE_NOT_USED: + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, \ + "InnoDB: invalid ROW_FORMAT specifier."); + ret = FALSE; break; } @@ -6910,7 +6941,7 @@ ha_innobase::create( const ulint file_format = srv_file_format; const char* stmt; size_t stmt_len; - enum row_type row_type; + enum row_type row_format; DBUG_ENTER("ha_innobase::create"); @@ -7010,8 +7041,8 @@ ha_innobase::create( push_warning( thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE" - " requires innodb_file_per_table."); + "InnoDB: KEY_BLOCK_SIZE requires" + " innodb_file_per_table."); flags = 0; } @@ -7028,20 +7059,19 @@ ha_innobase::create( push_warning_printf( thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ignoring" - " KEY_BLOCK_SIZE=%lu.", + "InnoDB: ignoring KEY_BLOCK_SIZE=%lu.", create_info->key_block_size); } } - row_type = form->s->row_type; + row_format = form->s->row_type; if (flags) { /* if ROW_FORMAT is set to default, automatically change it to COMPRESSED.*/ - if (row_type == ROW_TYPE_DEFAULT) { - row_type = ROW_TYPE_COMPRESSED; - } else if (row_type != ROW_TYPE_COMPRESSED) { + if (row_format == ROW_TYPE_DEFAULT) { + row_format = ROW_TYPE_COMPRESSED; + } else if (row_format != ROW_TYPE_COMPRESSED) { /* ROW_FORMAT other than COMPRESSED ignores KEY_BLOCK_SIZE. It does not make sense to reject conflicting @@ -7058,7 +7088,7 @@ ha_innobase::create( } } else { /* flags == 0 means no KEY_BLOCK_SIZE.*/ - if (row_type == ROW_TYPE_COMPRESSED) { + if (row_format == ROW_TYPE_COMPRESSED) { /* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE implies half the maximum KEY_BLOCK_SIZE. */ @@ -7073,7 +7103,7 @@ ha_innobase::create( } } - switch (row_type) { + switch (row_format) { case ROW_TYPE_REDUNDANT: break; case ROW_TYPE_COMPRESSED: @@ -7084,25 +7114,25 @@ ha_innobase::create( ER_ILLEGAL_HA_CREATE_OPTION, "InnoDB: ROW_FORMAT=%s requires" " innodb_file_per_table.", - get_row_format_name(row_type)); + get_row_format_name(row_format)); } else if (file_format < DICT_TF_FORMAT_ZIP) { push_warning_printf( thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, "InnoDB: ROW_FORMAT=%s requires" " innodb_file_format > Antelope.", - get_row_format_name(row_type)); + get_row_format_name(row_format)); } else { flags |= DICT_TF_COMPACT - | (DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT); + | (DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT); break; } /* fall through */ case ROW_TYPE_NOT_USED: case ROW_TYPE_FIXED: - default: + case ROW_TYPE_PAGE: push_warning( thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, @@ -7220,23 +7250,25 @@ ha_innobase::create( setup at this stage and so we use thd. */ /* We need to copy the AUTOINC value from the old table if - this is an ALTER TABLE or CREATE INDEX because CREATE INDEX - does a table copy too. */ + this is an ALTER|OPTIMIZE TABLE or CREATE INDEX because CREATE INDEX + does a table copy too. If query was one of : + + CREATE TABLE ...AUTO_INCREMENT = x; or + ALTER TABLE...AUTO_INCREMENT = x; or + OPTIMIZE TABLE t; or + CREATE INDEX x on t(...); + + Find out a table definition from the dictionary and get + the current value of the auto increment field. Set a new + value to the auto increment field if the value is greater + than the maximum value in the column. */ if (((create_info->used_fields & HA_CREATE_USED_AUTO) || thd_sql_command(thd) == SQLCOM_ALTER_TABLE + || thd_sql_command(thd) == SQLCOM_OPTIMIZE || thd_sql_command(thd) == SQLCOM_CREATE_INDEX) && create_info->auto_increment_value > 0) { - /* Query was one of : - CREATE TABLE ...AUTO_INCREMENT = x; or - ALTER TABLE...AUTO_INCREMENT = x; or - CREATE INDEX x on t(...); - Find out a table definition from the dictionary and get - the current value of the auto increment field. Set a new - value to the auto increment field if the value is greater - than the maximum value in the column. */ - auto_inc_value = create_info->auto_increment_value; dict_table_autoinc_lock(innobase_table); @@ -7947,6 +7979,65 @@ innobase_get_mysql_key_number_for_index( return(0); } + +/*********************************************************************//** +Calculate Record Per Key value. Need to exclude the NULL value if +innodb_stats_method is set to "nulls_ignored" +@return estimated record per key value */ +static +ha_rows +innodb_rec_per_key( +/*===============*/ + dict_index_t* index, /*!< in: dict_index_t structure */ + ulint i, /*!< in: the column we are + calculating rec per key */ + ha_rows records) /*!< in: estimated total records */ +{ + ha_rows rec_per_key; + + ut_ad(i < dict_index_get_n_unique(index)); + + /* Note the stat_n_diff_key_vals[] stores the diff value with + n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */ + if (index->stat_n_diff_key_vals[i + 1] == 0) { + + rec_per_key = records; + } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) { + ib_int64_t num_null; + + /* Number of rows with NULL value in this + field */ + num_null = records - index->stat_n_non_null_key_vals[i]; + + /* In theory, index->stat_n_non_null_key_vals[i] + should always be less than the number of records. + Since this is statistics value, the value could + have slight discrepancy. But we will make sure + the number of null values is not a negative number. */ + num_null = (num_null < 0) ? 0 : num_null; + + /* If the number of NULL values is the same as or + large than that of the distinct values, we could + consider that the table consists mostly of NULL value. + Set rec_per_key to 1. */ + if (index->stat_n_diff_key_vals[i + 1] <= num_null) { + rec_per_key = 1; + } else { + /* Need to exclude rows with NULL values from + rec_per_key calculation */ + rec_per_key = (ha_rows)( + (records - num_null) + / (index->stat_n_diff_key_vals[i + 1] + - num_null)); + } + } else { + rec_per_key = (ha_rows) + (records / index->stat_n_diff_key_vals[i + 1]); + } + + return(rec_per_key); +} + /*********************************************************************//** Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ @@ -8005,6 +8096,10 @@ ha_innobase::info_low( for (index = dict_table_get_first_index(ib_table); index != NULL; index = dict_table_get_next_index(index)) { + if (dict_is_older_statistics(index)) { + row_delete_stats_for_mysql(index, prebuilt->trx); + innobase_commit_low(prebuilt->trx); + } row_insert_stats_for_mysql(index, prebuilt->trx); innobase_commit_low(prebuilt->trx); } @@ -8197,13 +8292,8 @@ ha_innobase::info_low( break; } - if (index->stat_n_diff_key_vals[j + 1] == 0) { - - rec_per_key = stats.records; - } else { - rec_per_key = (ha_rows)(stats.records / - index->stat_n_diff_key_vals[j + 1]); - } + rec_per_key = innodb_rec_per_key( + index, j, stats.records); /* Since MySQL seems to favor table scans too much over index searches, we pretend @@ -11341,25 +11431,6 @@ static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_sample_pages, "The number of index pages to sample when calculating statistics (default 8)", NULL, NULL, 8, 1, ~0ULL, 0); -const char *innobase_stats_method_names[]= -{ - "nulls_equal", - "nulls_unequal", - "nulls_ignored", - NullS -}; -TYPELIB innobase_stats_method_typelib= -{ - array_elements(innobase_stats_method_names) - 1, "innobase_stats_method_typelib", - innobase_stats_method_names, NULL -}; -static MYSQL_SYSVAR_ENUM(stats_method, srv_stats_method, - PLUGIN_VAR_RQCMDARG, - "Specifies how InnoDB index statistics collection code should threat NULLs. " - "Possible values of name are same to for 'myisam_stats_method'. " - "This is startup parameter.", - NULL, NULL, 0, &innobase_stats_method_typelib); - static MYSQL_SYSVAR_ULONG(stats_auto_update, srv_stats_auto_update, PLUGIN_VAR_RQCMDARG, "Enable/Disable InnoDB's auto update statistics of indexes. " @@ -11548,6 +11619,13 @@ static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering, innodb_change_buffering_validate, innodb_change_buffering_update, "inserts"); +static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method, + PLUGIN_VAR_RQCMDARG, + "Specifies how InnoDB index statistics collection code should " + "treat NULLs. Possible values are NULLS_EQUAL (default), " + "NULLS_UNEQUAL and NULLS_IGNORED", + NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib); + #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, PLUGIN_VAR_RQCMDARG, @@ -11686,6 +11764,12 @@ static MYSQL_SYSVAR_ULONG(pass_corrupt_table, srv_pass_corrupt_table, "except for the deletion.", NULL, NULL, 0, 0, 1, 0); +static MYSQL_SYSVAR_ULONG(lazy_drop_table, srv_lazy_drop_table, + PLUGIN_VAR_RQCMDARG, + "At deleting tablespace, only miminum needed processes at the time are done. " + "e.g. for http://bugs.mysql.com/51325", + NULL, NULL, 0, 0, 1, 0); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(page_size), MYSQL_SYSVAR(log_block_size), @@ -11735,12 +11819,12 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(overwrite_relay_log_info), MYSQL_SYSVAR(rollback_on_timeout), MYSQL_SYSVAR(stats_on_metadata), - MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(stats_auto_update), MYSQL_SYSVAR(stats_update_need_lock), MYSQL_SYSVAR(use_sys_stats_table), MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), + MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(replication_delay), MYSQL_SYSVAR(status_file), MYSQL_SYSVAR(strict_mode), @@ -11777,6 +11861,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(auto_lru_dump), MYSQL_SYSVAR(use_purge_thread), MYSQL_SYSVAR(pass_corrupt_table), + MYSQL_SYSVAR(lazy_drop_table), NULL }; diff --git a/handler/i_s.cc b/handler/i_s.cc index fa179c52f91..efb0822f14b 100644 --- a/handler/i_s.cc +++ b/handler/i_s.cc @@ -1239,8 +1239,16 @@ fill_innodb_trx_from_cache( row->trx_mysql_thread_id)); /* trx_query */ - OK(field_store_string(fields[IDX_TRX_QUERY], - row->trx_query)); + if (row->trx_query) { + /* store will do appropriate character set + conversion check */ + fields[IDX_TRX_QUERY]->store( + row->trx_query, strlen(row->trx_query), + row->trx_query_cs); + fields[IDX_TRX_QUERY]->set_notnull(); + } else { + fields[IDX_TRX_QUERY]->set_null(); + } OK(schema_table_store_record(thd, table)); } @@ -1443,16 +1451,7 @@ fill_innodb_locks_from_cache( for (i = 0; i < rows_num; i++) { i_s_locks_row_t* row; - - /* note that the decoded database or table name is - never expected to be longer than NAME_LEN; - NAME_LEN for database name - 2 for surrounding quotes around database name - NAME_LEN for table name - 2 for surrounding quotes around table name - 1 for the separating dot (.) - 9 for the #mysql50# prefix */ - char buf[2 * NAME_LEN + 14]; + char buf[MAX_FULL_NAME_LEN + 1]; const char* bufend; char lock_trx_id[TRX_ID_MAX_LEN + 1]; @@ -3281,6 +3280,14 @@ static ST_FIELD_INFO i_s_innodb_sys_stats_info[] = STRUCT_FLD(old_name, ""), STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + {STRUCT_FLD(field_name, "NON_NULL_VALS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + END_OF_ST_FIELD_INFO }; @@ -3549,6 +3556,9 @@ copy_sys_stats_rec( { int status; int field; + ulint n_fields; + + n_fields = rec_get_n_fields_old(rec); /* INDEX_ID */ field = dict_index_get_nth_col_pos(index, 0); @@ -3568,6 +3578,16 @@ copy_sys_stats_rec( if (status) { return status; } + /* NON_NULL_VALS */ + if (n_fields < 6) { + table->field[3]->set_null(); + } else { + field = dict_index_get_nth_col_pos(index, 3); + status = copy_id_field(table, 3, rec, field); + if (status) { + return status; + } + } return 0; } diff --git a/ibuf/ibuf0ibuf.c b/ibuf/ibuf0ibuf.c index 12dbc29be23..3f741da60bb 100644 --- a/ibuf/ibuf0ibuf.c +++ b/ibuf/ibuf0ibuf.c @@ -1881,9 +1881,9 @@ ibuf_remove_free_page(void) fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, IBUF_SPACE_ID, page_no, &mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no); -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ ibuf_enter(); @@ -1925,9 +1925,9 @@ ibuf_remove_free_page(void) ibuf_bitmap_page_set_bits( bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no); -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ mtr_commit(&mtr); mutex_exit(&ibuf_mutex); diff --git a/include/btr0cur.h b/include/btr0cur.h index 1e1dc0f580a..ece3621fa97 100644 --- a/include/btr0cur.h +++ b/include/btr0cur.h @@ -332,10 +332,14 @@ ulint btr_cur_del_mark_set_clust_rec( /*===========================*/ ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor */ + buf_block_t* block, /*!< in/out: buffer block of the record */ + rec_t* rec, /*!< in/out: record */ + dict_index_t* index, /*!< in: clustered index of the record */ + const ulint* offsets,/*!< in: rec_get_offsets(rec) */ ibool val, /*!< in: value to set */ que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr) /*!< in: mtr */ + __attribute__((nonnull)); /***********************************************************//** Sets a secondary index record delete mark to TRUE or FALSE. @return DB_SUCCESS, DB_LOCK_WAIT, or error number */ @@ -474,47 +478,32 @@ btr_estimate_n_rows_in_range( /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is nulls_ignored, we also record the number of +non-null values for each prefix and stored the estimates in +array index->stat_n_non_null_key_vals. */ UNIV_INTERN void btr_estimate_number_of_different_key_vals( /*======================================*/ dict_index_t* index); /*!< in: index */ /*******************************************************************//** -Marks not updated extern fields as not-owned by this record. The ownership -is transferred to the updated record which is inserted elsewhere in the +Marks non-updated off-page fields as disowned by this record. The ownership +must be transferred to the updated record which is inserted elsewhere in the index tree. In purge only the owner of externally stored field is allowed -to free the field. -@return TRUE if BLOB ownership was transferred */ +to free the field. */ UNIV_INTERN -ibool -btr_cur_mark_extern_inherited_fields( -/*=================================*/ +void +btr_cur_disown_inherited_fields( +/*============================*/ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed part will be updated, or NULL */ rec_t* rec, /*!< in/out: record in a clustered index */ dict_index_t* index, /*!< in: index of the page */ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ const upd_t* update, /*!< in: update vector */ - mtr_t* mtr); /*!< in: mtr, or NULL if not logged */ -/*******************************************************************//** -The complement of the previous function: in an update entry may inherit -some externally stored fields from a record. We must mark them as inherited -in entry, so that they are not freed in a rollback. */ -UNIV_INTERN -void -btr_cur_mark_dtuple_inherited_extern( -/*=================================*/ - dtuple_t* entry, /*!< in/out: updated entry to be - inserted to clustered index */ - const upd_t* update); /*!< in: update vector */ -/*******************************************************************//** -Marks all extern fields in a dtuple as owned by the record. */ -UNIV_INTERN -void -btr_cur_unmark_dtuple_extern_fields( -/*================================*/ - dtuple_t* entry); /*!< in/out: clustered index entry */ + mtr_t* mtr) /*!< in/out: mini-transaction */ + __attribute__((nonnull(2,3,4,5,6))); /*******************************************************************//** Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The extern flags in rec will have to be set beforehand. @@ -523,8 +512,8 @@ file segment of the index tree. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint -btr_store_big_rec_extern_fields( -/*============================*/ +btr_store_big_rec_extern_fields_func( +/*=================================*/ dict_index_t* index, /*!< in: index of rec; the index tree MUST be X-latched */ buf_block_t* rec_block, /*!< in/out: block containing rec */ @@ -533,10 +522,42 @@ btr_store_big_rec_extern_fields( the "external storage" flags in offsets will not correspond to rec when this function returns */ - big_rec_t* big_rec_vec, /*!< in: vector containing fields +#ifdef UNIV_DEBUG + mtr_t* local_mtr, /*!< in: mtr containing the + latch to rec and to the tree */ +#endif /* UNIV_DEBUG */ +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ibool update_in_place,/*! in: TRUE if the record is updated + in place (not delete+insert) */ +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + const big_rec_t*big_rec_vec) /*!< in: vector containing fields to be stored externally */ - mtr_t* local_mtr); /*!< in: mtr containing the latch to - rec and to the tree */ + __attribute__((nonnull)); + +/** Stores the fields in big_rec_vec to the tablespace and puts pointers to +them in rec. The extern flags in rec will have to be set beforehand. +The fields are stored on pages allocated from leaf node +file segment of the index tree. +@param index in: clustered index; MUST be X-latched by mtr +@param b in/out: block containing rec; MUST be X-latched by mtr +@param rec in/out: clustered index record +@param offsets in: rec_get_offsets(rec, index); + the "external storage" flags in offsets will not be adjusted +@param mtr in: mini-transaction that holds x-latch on index and b +@param upd in: TRUE if the record is updated in place (not delete+insert) +@param big in: vector containing fields to be stored externally +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +#ifdef UNIV_DEBUG +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big) +#elif defined UNIV_BLOB_LIGHT_DEBUG +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big) +#else +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big) +#endif + /*******************************************************************//** Frees the space in an externally stored field to the file space management if the field in data is owned the externally stored field, diff --git a/include/buf0buf.h b/include/buf0buf.h index e06927f42f0..bc0e9170281 100644 --- a/include/buf0buf.h +++ b/include/buf0buf.h @@ -166,10 +166,8 @@ Allocates a buffer block. @return own: the allocated block, in state BUF_BLOCK_MEMORY */ UNIV_INLINE buf_block_t* -buf_block_alloc( -/*============*/ - ulint zip_size); /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_block_alloc(void); +/*=================*/ /********************************************************************//** Frees a buffer block which does not contain a file page. */ UNIV_INLINE @@ -371,7 +369,7 @@ buf_reset_check_index_page_at_flush( /*================================*/ ulint space, /*!< in: space id */ ulint offset);/*!< in: page number */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG /********************************************************************//** Sets file_page_was_freed TRUE if the page is found in the buffer pool. This function should be called when we free a file page and want the @@ -396,7 +394,7 @@ buf_page_reset_file_page_was_freed( /*===============================*/ ulint space, /*!< in: space id */ ulint offset); /*!< in: page number */ -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ /********************************************************************//** Reads the freed_page_clock of a buffer block. @return freed_page_clock */ @@ -1157,12 +1155,13 @@ struct buf_page_struct{ 0 if the block was never accessed in the buffer pool */ /* @} */ + ibool space_was_being_deleted; ibool is_corrupt; -# ifdef UNIV_DEBUG_FILE_ACCESSES +# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ibool file_page_was_freed; /*!< this is set to TRUE when fsp frees a page in buffer pool */ -# endif /* UNIV_DEBUG_FILE_ACCESSES */ +# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ #endif /* !UNIV_HOTBACKUP */ }; diff --git a/include/buf0buf.ic b/include/buf0buf.ic index a725446629f..2cb0d8ef497 100644 --- a/include/buf0buf.ic +++ b/include/buf0buf.ic @@ -384,6 +384,7 @@ buf_block_set_file_page( buf_block_set_state(block, BUF_BLOCK_FILE_PAGE); block->page.space = space; block->page.offset = page_no; + block->page.space_was_being_deleted = FALSE; } /*********************************************************************//** @@ -757,14 +758,12 @@ Allocates a buffer block. @return own: the allocated block, in state BUF_BLOCK_MEMORY */ UNIV_INLINE buf_block_t* -buf_block_alloc( -/*============*/ - ulint zip_size) /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_block_alloc(void) +/*=================*/ { buf_block_t* block; - block = buf_LRU_get_free_block(zip_size); + block = buf_LRU_get_free_block(); buf_block_set_state(block, BUF_BLOCK_MEMORY); @@ -1075,7 +1074,7 @@ buf_page_release( buf_block_t* block, /*!< in: buffer block */ ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr __attribute__((unused))) /*!< in: mtr */ { ut_ad(block); diff --git a/include/buf0lru.h b/include/buf0lru.h index d3b59e8b579..fe7c067dfb7 100644 --- a/include/buf0lru.h +++ b/include/buf0lru.h @@ -84,6 +84,13 @@ void buf_LRU_invalidate_tablespace( /*==========================*/ ulint id); /*!< in: space id */ +/******************************************************************//** +*/ +UNIV_INTERN +void +buf_LRU_mark_space_was_deleted( +/*===========================*/ + ulint id); /*!< in: space id */ /********************************************************************//** Insert a compressed block into buf_pool->zip_clean in the LRU order. */ UNIV_INTERN @@ -112,11 +119,8 @@ buf_LRU_free_block( buf_page_t* bpage, /*!< in: block to be freed */ ibool zip, /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ - ibool* buf_pool_mutex_released, - /*!< in: pointer to a variable that will - be assigned TRUE if buf_pool_mutex - was temporarily released, or NULL */ - ibool have_LRU_mutex); + ibool have_LRU_mutex) + __attribute__((nonnull)); /******************************************************************//** Try to free a replaceable block. @return TRUE if found and freed */ @@ -147,10 +151,9 @@ LRU list to the free list. @return the free control block, in state BUF_BLOCK_READY_FOR_USE */ UNIV_INTERN buf_block_t* -buf_LRU_get_free_block( -/*===================*/ - ulint zip_size); /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_LRU_get_free_block(void) +/*========================*/ + __attribute__((warn_unused_result)); /******************************************************************//** Puts a block back to the free list. */ diff --git a/include/buf0types.h b/include/buf0types.h index 507f1543bbb..ce3e5ecc9c5 100644 --- a/include/buf0types.h +++ b/include/buf0types.h @@ -76,7 +76,7 @@ enum buf_io_fix { /** twice the maximum block size of the buddy system; the underlying memory is aligned by this amount: this must be equal to UNIV_PAGE_SIZE */ -#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES) +#define BUF_BUDDY_HIGH ((ulint)BUF_BUDDY_LOW << BUF_BUDDY_SIZES) /* @} */ #endif diff --git a/include/data0data.h b/include/data0data.h index f9fce3f3657..cab8d790ac1 100644 --- a/include/data0data.h +++ b/include/data0data.h @@ -154,14 +154,19 @@ dfield_dup( dfield_t* field, /*!< in/out: data field */ mem_heap_t* heap); /*!< in: memory heap where allocated */ /*********************************************************************//** -Tests if data length and content is equal for two dfields. -@return TRUE if equal */ +Tests if two data fields are equal. +If len==0, tests the data length and content for equality. +If len>0, tests the first len bytes of the content for equality. +@return TRUE if both fields are NULL or if they are equal */ UNIV_INLINE ibool dfield_datas_are_binary_equal( /*==========================*/ const dfield_t* field1, /*!< in: field */ - const dfield_t* field2);/*!< in: field */ + const dfield_t* field2, /*!< in: field */ + ulint len) /*!< in: maximum prefix to compare, + or 0 to compare the whole field length */ + __attribute__((nonnull, warn_unused_result)); /*********************************************************************//** Tests if dfield data length and content is equal to the given. @return TRUE if equal */ diff --git a/include/data0data.ic b/include/data0data.ic index da79aa33702..74e0f7d09a0 100644 --- a/include/data0data.ic +++ b/include/data0data.ic @@ -229,20 +229,30 @@ dfield_dup( } /*********************************************************************//** -Tests if data length and content is equal for two dfields. -@return TRUE if equal */ +Tests if two data fields are equal. +If len==0, tests the data length and content for equality. +If len>0, tests the first len bytes of the content for equality. +@return TRUE if both fields are NULL or if they are equal */ UNIV_INLINE ibool dfield_datas_are_binary_equal( /*==========================*/ const dfield_t* field1, /*!< in: field */ - const dfield_t* field2) /*!< in: field */ + const dfield_t* field2, /*!< in: field */ + ulint len) /*!< in: maximum prefix to compare, + or 0 to compare the whole field length */ { - ulint len; + ulint len2 = len; - len = field1->len; + if (field1->len == UNIV_SQL_NULL || len == 0 || field1->len < len) { + len = field1->len; + } - return(len == field2->len + if (field2->len == UNIV_SQL_NULL || len2 == 0 || field2->len < len2) { + len2 = field2->len; + } + + return(len == len2 && (len == UNIV_SQL_NULL || !memcmp(field1->data, field2->data, len))); } diff --git a/include/dict0boot.h b/include/dict0boot.h index 9239e031a7f..a57c5127323 100644 --- a/include/dict0boot.h +++ b/include/dict0boot.h @@ -146,6 +146,7 @@ clustered index */ #define DICT_SYS_INDEXES_NAME_FIELD 4 #define DICT_SYS_STATS_DIFF_VALS_FIELD 4 +#define DICT_SYS_STATS_NON_NULL_VALS_FIELD 5 /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is diff --git a/include/dict0dict.h b/include/dict0dict.h index 7baacdd6055..2baecdc958a 100644 --- a/include/dict0dict.h +++ b/include/dict0dict.h @@ -1062,6 +1062,13 @@ dict_update_statistics( not been initialized yet, otherwise do nothing */ ibool sync); +/*********************************************************************//** +*/ +UNIV_INTERN +ibool +dict_is_older_statistics( +/*=====================*/ + dict_index_t* index); /********************************************************************//** Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN diff --git a/include/dict0mem.h b/include/dict0mem.h index 6736c2a3a36..f47293bedf6 100644 --- a/include/dict0mem.h +++ b/include/dict0mem.h @@ -321,6 +321,12 @@ struct dict_index_struct{ dict_get_n_unique(index); we periodically calculate new estimates */ + ib_int64_t* stat_n_non_null_key_vals; + /* approximate number of non-null key values + for this index, for each column where + n < dict_get_n_unique(index); This + is used when innodb_stats_method is + "nulls_ignored". */ ulint stat_index_size; /*!< approximate index size in database pages */ diff --git a/include/dict0types.h b/include/dict0types.h index 7ad69193cc9..f14b59a19d4 100644 --- a/include/dict0types.h +++ b/include/dict0types.h @@ -33,11 +33,6 @@ typedef struct dict_index_struct dict_index_t; typedef struct dict_table_struct dict_table_t; typedef struct dict_foreign_struct dict_foreign_t; -/* A cluster object is a table object with the type field set to -DICT_CLUSTERED */ - -typedef dict_table_t dict_cluster_t; - typedef struct ind_node_struct ind_node_t; typedef struct tab_node_struct tab_node_t; diff --git a/include/fil0fil.h b/include/fil0fil.h index a262ec8f9cc..fbf8ca20db3 100644 --- a/include/fil0fil.h +++ b/include/fil0fil.h @@ -648,18 +648,10 @@ _fil_io( Confirm whether the parameters are valid or not */ UNIV_INTERN ibool -fil_area_is_exist( +fil_is_exist( /*==============*/ ulint space_id, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint block_offset, /*!< in: offset in number of blocks */ - ulint byte_offset, /*!< in: remainder of offset in bytes; in - aio this must be divisible by the OS block - size */ - ulint len); /*!< in: how many bytes to read or write; this - must not cross a file boundary; in aio this - must be a block size multiple */ + ulint block_offset); /*!< in: offset in number of blocks */ /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the handler for completed requests. The aio array of pending requests is divided diff --git a/include/fsp0types.h b/include/fsp0types.h index 496081c2346..43e385b7eb0 100644 --- a/include/fsp0types.h +++ b/include/fsp0types.h @@ -42,7 +42,7 @@ fseg_alloc_free_page) */ /* @} */ /** File space extent size (one megabyte) in pages */ -#define FSP_EXTENT_SIZE (1 << (20 - UNIV_PAGE_SIZE_SHIFT)) +#define FSP_EXTENT_SIZE ((ulint)1 << (20 - UNIV_PAGE_SIZE_SHIFT)) /** On a page of any file segment, data may be put starting from this offset */ diff --git a/include/os0file.h b/include/os0file.h index cbbec2cf55e..732e930517b 100644 --- a/include/os0file.h +++ b/include/os0file.h @@ -657,6 +657,7 @@ os_aio( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + ulint space_id, trx_t* trx); /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in @@ -717,7 +718,8 @@ os_aio_windows_handle( parameters are valid and can be used to restart the operation, for example */ void** message2, - ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* space_id); #endif /**********************************************************************//** @@ -739,7 +741,8 @@ os_aio_simulated_handle( parameters are valid and can be used to restart the operation, for example */ void** message2, - ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* space_id); /**********************************************************************//** Validates the consistency of the aio system. @return TRUE if ok */ diff --git a/include/page0cur.h b/include/page0cur.h index 6b444b3dd96..1544b0abe1c 100644 --- a/include/page0cur.h +++ b/include/page0cur.h @@ -293,22 +293,6 @@ page_cur_open_on_rnd_user_rec( /*==========================*/ buf_block_t* block, /*!< in: page */ page_cur_t* cursor);/*!< out: page cursor */ - -UNIV_INTERN -void -page_cur_open_on_nth_user_rec( -/*==========================*/ - buf_block_t* block, /*!< in: page */ - page_cur_t* cursor, /*!< out: page cursor */ - ulint nth); - -UNIV_INTERN -ibool -page_cur_open_on_rnd_user_rec_after_nth( -/*==========================*/ - buf_block_t* block, /*!< in: page */ - page_cur_t* cursor, /*!< out: page cursor */ - ulint nth); #endif /* !UNIV_HOTBACKUP */ /***********************************************************//** Parses a log record of a record insert on a page. diff --git a/include/rem0cmp.h b/include/rem0cmp.h index fcea62ad486..a908521c9f7 100644 --- a/include/rem0cmp.h +++ b/include/rem0cmp.h @@ -165,15 +165,18 @@ cmp_rec_rec_with_match( const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index, /*!< in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /*!< in/out: number of already completely matched fields; when the function returns, contains the value the for current comparison */ - ulint* matched_bytes, /*!< in/out: number of already matched + ulint* matched_bytes);/*!< in/out: number of already matched bytes within the first field not completely matched; when the function returns, contains the value for the current comparison */ - ulint stats_method); /*************************************************************//** This function is used to compare two physical records. Only the common first fields are compared. diff --git a/include/rem0cmp.ic b/include/rem0cmp.ic index d5185ec94af..63415fe7837 100644 --- a/include/rem0cmp.ic +++ b/include/rem0cmp.ic @@ -87,5 +87,5 @@ cmp_rec_rec( ulint match_b = 0; return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, - &match_f, &match_b, 0)); + FALSE, &match_f, &match_b)); } diff --git a/include/row0mysql.h b/include/row0mysql.h index 141f4beb81e..4acfd2e793b 100644 --- a/include/row0mysql.h +++ b/include/row0mysql.h @@ -384,6 +384,14 @@ row_insert_stats_for_mysql( dict_index_t* index, trx_t* trx); /*********************************************************************//** +*/ +UNIV_INTERN +int +row_delete_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx); +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. diff --git a/include/row0upd.h b/include/row0upd.h index ea14cd64213..97b7ec49a17 100644 --- a/include/row0upd.h +++ b/include/row0upd.h @@ -280,16 +280,29 @@ NOTE: we compare the fields as binary strings! @return TRUE if update vector changes an ordering field in the index record */ UNIV_INTERN ibool -row_upd_changes_ord_field_binary( -/*=============================*/ +row_upd_changes_ord_field_binary_func( +/*==================================*/ + dict_index_t* index, /*!< in: index of the record */ + const upd_t* update, /*!< in: update vector for the row; NOTE: the + field numbers in this MUST be clustered index + positions! */ +#ifdef UNIV_DEBUG + const que_thr_t*thr, /*!< in: query thread */ +#endif /* UNIV_DEBUG */ const dtuple_t* row, /*!< in: old value of row, or NULL if the row and the data values in update are not known when this function is called, e.g., at compile time */ - dict_index_t* index, /*!< in: index of the record */ - const upd_t* update);/*!< in: update vector for the row; NOTE: the - field numbers in this MUST be clustered index - positions! */ + const row_ext_t*ext) /*!< NULL, or prefixes of the externally + stored columns in the old row */ + __attribute__((nonnull(1,2), warn_unused_result)); +#ifdef UNIV_DEBUG +# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \ + row_upd_changes_ord_field_binary_func(index,update,thr,row,ext) +#else /* UNIV_DEBUG */ +# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \ + row_upd_changes_ord_field_binary_func(index,update,row,ext) +#endif /* UNIV_DEBUG */ /***********************************************************//** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering @@ -462,11 +475,16 @@ struct upd_node_struct{ #define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be inserted, old record is already delete marked */ -#define UPD_NODE_UPDATE_ALL_SEC 4 /* an ordering field of the clustered +#define UPD_NODE_INSERT_BLOB 4 /* clustered index record should be + inserted, old record is already + delete-marked; non-updated BLOBs + should be inherited by the new record + and disowned by the old record */ +#define UPD_NODE_UPDATE_ALL_SEC 5 /* an ordering field of the clustered index record was changed, or this is a delete operation: should update all the secondary index records */ -#define UPD_NODE_UPDATE_SOME_SEC 5 /* secondary index entries should be +#define UPD_NODE_UPDATE_SOME_SEC 6 /* secondary index entries should be looked at and updated if an ordering field changed */ diff --git a/include/srv0srv.h b/include/srv0srv.h index 9888c95fa66..13bf0b5b9ea 100644 --- a/include/srv0srv.h +++ b/include/srv0srv.h @@ -177,6 +177,11 @@ capacity. PCT_IO(5) -> returns the number of IO operations that is 5% of the max where max is srv_io_capacity. */ #define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0))) +/* The "innodb_stats_method" setting, decides how InnoDB is going +to treat NULL value when collecting statistics. It is not defined +as enum type because the configure option takes unsigned integer type. */ +extern ulong srv_innodb_stats_method; + #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; extern ibool srv_archive_recovery; @@ -208,10 +213,6 @@ extern ulint srv_fast_shutdown; /* If this is 1, do not do a extern ibool srv_innodb_status; extern unsigned long long srv_stats_sample_pages; -extern ulint srv_stats_method; -#define SRV_STATS_METHOD_NULLS_EQUAL 0 -#define SRV_STATS_METHOD_NULLS_NOT_EQUAL 1 -#define SRV_STATS_METHOD_IGNORE_NULLS 2 extern ulint srv_stats_auto_update; extern ulint srv_stats_update_need_lock; extern ibool srv_use_sys_stats_table; @@ -242,6 +243,8 @@ extern ulint srv_pass_corrupt_table; extern ulint srv_extra_rsegments; extern ulint srv_dict_size_limit; + +extern ulint srv_lazy_drop_table; /*-------------------------------------------*/ extern ulint srv_n_rows_inserted; @@ -413,6 +416,19 @@ enum { in connection with recovery */ }; +/* Alternatives for srv_innodb_stats_method, which could be changed by +setting innodb_stats_method */ +enum srv_stats_method_name_enum { + SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as + equal. This is the default setting + for innodb_stats_method */ + SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as + NOT equal. */ + SRV_STATS_NULLS_IGNORED /* NULL values are ignored */ +}; + +typedef enum srv_stats_method_name_enum srv_stats_method_name_t; + #ifndef UNIV_HOTBACKUP /** Types of threads existing in the system. */ enum srv_thread_type { diff --git a/include/sync0rw.h b/include/sync0rw.h index 4edf93f4042..22de1bfdd93 100644 --- a/include/sync0rw.h +++ b/include/sync0rw.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -113,14 +113,14 @@ is necessary only if the memory block containing it is freed. */ #ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG # define rw_lock_create(L, level) \ - rw_lock_create_func((L), (level), #L, __FILE__, __LINE__) + rw_lock_create_func((L), (level), __FILE__, __LINE__, #L) # else /* UNIV_SYNC_DEBUG */ # define rw_lock_create(L, level) \ - rw_lock_create_func((L), #L, __FILE__, __LINE__) + rw_lock_create_func((L), __FILE__, __LINE__, #L) # endif /* UNIV_SYNC_DEBUG */ #else /* UNIV_DEBUG */ # define rw_lock_create(L, level) \ - rw_lock_create_func((L), #L, NULL, 0) + rw_lock_create_func((L), #L) #endif /* UNIV_DEBUG */ /******************************************************************//** @@ -137,10 +137,10 @@ rw_lock_create_func( # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ const char* cfile_name, /*!< in: file name where created */ - ulint cline); /*!< in: file line where created */ + ulint cline, /*!< in: file line where created */ +#endif /* UNIV_DEBUG */ + const char* cmutex_name); /*!< in: mutex name */ /******************************************************************//** Calling this function is obligatory only if the memory buffer containing the rw-lock is freed. Removes an rw-lock object from the global list. The @@ -490,6 +490,7 @@ UNIV_INTERN void rw_lock_debug_print( /*================*/ + FILE* f, /*!< in: output stream */ rw_lock_debug_t* info); /*!< in: debug struct */ #endif /* UNIV_SYNC_DEBUG */ diff --git a/include/sync0sync.h b/include/sync0sync.h index a500cf1da45..f2ff83101ab 100644 --- a/include/sync0sync.h +++ b/include/sync0sync.h @@ -73,14 +73,14 @@ necessary only if the memory block containing it is freed. */ #ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG # define mutex_create(M, level) \ - mutex_create_func((M), #M, (level), __FILE__, __LINE__) + mutex_create_func((M), (level), __FILE__, __LINE__, #M) # else # define mutex_create(M, level) \ - mutex_create_func((M), #M, __FILE__, __LINE__) + mutex_create_func((M), __FILE__, __LINE__, #M) # endif #else # define mutex_create(M, level) \ - mutex_create_func((M), #M, NULL, 0) + mutex_create_func((M), #M) #endif /******************************************************************//** @@ -93,14 +93,14 @@ void mutex_create_func( /*==============*/ mutex_t* mutex, /*!< in: pointer to memory */ - const char* cmutex_name, /*!< in: mutex name */ #ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ const char* cfile_name, /*!< in: file name where created */ - ulint cline); /*!< in: file line where created */ + ulint cline, /*!< in: file line where created */ +#endif /* UNIV_DEBUG */ + const char* cmutex_name); /*!< in: mutex name */ #undef mutex_free /* Fix for MacOS X */ @@ -496,6 +496,7 @@ or row lock! */ #define SYNC_BUF_POOL 150 #define SYNC_BUF_FLUSH_LIST 149 #define SYNC_DOUBLEWRITE 140 +#define SYNC_OUTER_ANY_LATCH 136 #define SYNC_ANY_LATCH 135 #define SYNC_THR_LOCAL 133 #define SYNC_MEM_HASH 131 diff --git a/include/trx0i_s.h b/include/trx0i_s.h index 7bd4e1b88c8..48d41038ea4 100644 --- a/include/trx0i_s.h +++ b/include/trx0i_s.h @@ -110,6 +110,8 @@ struct i_s_trx_row_struct { /*!< thd_get_thread_id() */ const char* trx_query; /*!< MySQL statement being executed in the transaction */ + struct charset_info_st* trx_query_cs; /*!< charset encode the MySQL + statement */ }; /** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */ diff --git a/include/trx0rseg.h b/include/trx0rseg.h index 303188f09f2..ea9956cd143 100644 --- a/include/trx0rseg.h +++ b/include/trx0rseg.h @@ -149,9 +149,7 @@ struct trx_rseg_struct{ ulint id; /*!< rollback segment id == the index of its slot in the trx system file copy */ mutex_t mutex; /*!< mutex protecting the fields in this - struct except id; NOTE that the latching - order must always be kernel mutex -> - rseg mutex */ + struct except id, which is constant */ ulint space; /*!< space where the rollback segment is header is placed */ ulint zip_size;/* compressed page size of space diff --git a/include/trx0trx.h b/include/trx0trx.h index 4c0ce392bcd..173c63918d3 100644 --- a/include/trx0trx.h +++ b/include/trx0trx.h @@ -214,12 +214,12 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL */ +@return trx or NULL; on match, the trx->xid will be invalidated */ UNIV_INTERN trx_t * trx_get_trx_by_xid( /*===============*/ - XID* xid); /*!< in: X/Open XA transaction identification */ + const XID* xid); /*!< in: X/Open XA transaction identifier */ /**********************************************************************//** If required, flushes the log to disk if we called trx_commit_for_mysql() with trx->flush_log_later == TRUE. diff --git a/include/univ.i b/include/univ.i index 14c89813cad..a658c7b6eaa 100644 --- a/include/univ.i +++ b/include/univ.i @@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 14 +#define INNODB_VERSION_BUGFIX 15 #ifndef PERCONA_INNODB_VERSION #define PERCONA_INNODB_VERSION 12.5 @@ -184,14 +184,15 @@ command. Not tested on Windows. */ debugging without UNIV_DEBUG */ #define UNIV_BUF_DEBUG /* Enable buffer pool debugging without UNIV_DEBUG */ +#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column + debugging without UNIV_DEBUG */ #define UNIV_DEBUG /* Enable ut_ad() assertions and disable UNIV_INLINE */ #define UNIV_DEBUG_LOCK_VALIDATE /* Enable ut_ad(lock_rec_validate_page()) assertions. */ -#define UNIV_DEBUG_FILE_ACCESSES /* Debug .ibd file access - (field file_page_was_freed - in buf_page_t) */ +#define UNIV_DEBUG_FILE_ACCESSES /* Enable freed block access + debugging without UNIV_DEBUG */ #define UNIV_LRU_DEBUG /* debug the buffer pool LRU */ #define UNIV_HASH_DEBUG /* debug HASH_ macros */ #define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */ @@ -307,6 +308,18 @@ number does not include a terminating '\0'. InnoDB probably can handle longer names internally */ #define MAX_TABLE_NAME_LEN 192 +/* The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is +the MySQL's NAME_LEN, see check_and_convert_db_name(). */ +#define MAX_DATABASE_NAME_LEN MAX_TABLE_NAME_LEN + +/* MAX_FULL_NAME_LEN defines the full name path including the +database name and table name. In addition, 14 bytes is added for: + 2 for surrounding quotes around table name + 1 for the separating dot (.) + 9 for the #mysql50# prefix */ +#define MAX_FULL_NAME_LEN \ + (MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14) + /* UNIVERSAL TYPE DEFINITIONS ========================== @@ -423,7 +436,7 @@ it is read or written. */ /* Use sun_prefetch when compile with Sun Studio */ # define UNIV_EXPECT(expr,value) (expr) # define UNIV_LIKELY_NULL(expr) (expr) -# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr) +# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr) # define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr) #else /* Dummy versions of the macros */ diff --git a/include/ut0vec.h b/include/ut0vec.h index a770f671cfc..0f8b955b098 100644 --- a/include/ut0vec.h +++ b/include/ut0vec.h @@ -93,6 +93,25 @@ ib_vector_get( ib_vector_t* vec, /*!< in: vector */ ulint n); /*!< in: element index to get */ +/****************************************************************//** +Get last element. The vector must not be empty. +@return last element */ +UNIV_INLINE +void* +ib_vector_get_last( +/*===============*/ + ib_vector_t* vec); /*!< in: vector */ + +/****************************************************************//** +Set the n'th element. */ +UNIV_INLINE +void +ib_vector_set( +/*==========*/ + ib_vector_t* vec, /*!< in/out: vector */ + ulint n, /*!< in: element index to set */ + void* elem); /*!< in: data element */ + /****************************************************************//** Remove the last element from the vector. */ UNIV_INLINE diff --git a/include/ut0vec.ic b/include/ut0vec.ic index 02e881f9bca..34c858868ce 100644 --- a/include/ut0vec.ic +++ b/include/ut0vec.ic @@ -50,6 +50,35 @@ ib_vector_get( return(vec->data[n]); } +/****************************************************************//** +Get last element. The vector must not be empty. +@return last element */ +UNIV_INLINE +void* +ib_vector_get_last( +/*===============*/ + ib_vector_t* vec) /*!< in: vector */ +{ + ut_a(vec->used > 0); + + return(vec->data[vec->used - 1]); +} + +/****************************************************************//** +Set the n'th element. */ +UNIV_INLINE +void +ib_vector_set( +/*==========*/ + ib_vector_t* vec, /*!< in/out: vector */ + ulint n, /*!< in: element index to set */ + void* elem) /*!< in: data element */ +{ + ut_a(n < vec->used); + + vec->data[n] = elem; +} + /****************************************************************//** Remove the last element from the vector. @return last vector element */ diff --git a/lock/lock0lock.c b/lock/lock0lock.c index 1ded67d9147..4fcb5b2c522 100644 --- a/lock/lock0lock.c +++ b/lock/lock0lock.c @@ -3631,6 +3631,80 @@ lock_table_create( return(lock); } +/*************************************************************//** +Pops autoinc lock requests from the transaction's autoinc_locks. We +handle the case where there are gaps in the array and they need to +be popped off the stack. */ +UNIV_INLINE +void +lock_table_pop_autoinc_locks( +/*=========================*/ + trx_t* trx) /*!< in/out: transaction that owns the AUTOINC locks */ +{ + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(!ib_vector_is_empty(trx->autoinc_locks)); + + /* Skip any gaps, gaps are NULL lock entries in the + trx->autoinc_locks vector. */ + + do { + ib_vector_pop(trx->autoinc_locks); + + if (ib_vector_is_empty(trx->autoinc_locks)) { + return; + } + + } while (ib_vector_get_last(trx->autoinc_locks) == NULL); +} + +/*************************************************************//** +Removes an autoinc lock request from the transaction's autoinc_locks. */ +UNIV_INLINE +void +lock_table_remove_autoinc_lock( +/*===========================*/ + lock_t* lock, /*!< in: table lock */ + trx_t* trx) /*!< in/out: transaction that owns the lock */ +{ + lock_t* autoinc_lock; + lint i = ib_vector_size(trx->autoinc_locks) - 1; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC); + ut_ad(lock_get_type_low(lock) & LOCK_TABLE); + ut_ad(!ib_vector_is_empty(trx->autoinc_locks)); + + /* With stored functions and procedures the user may drop + a table within the same "statement". This special case has + to be handled by deleting only those AUTOINC locks that were + held by the table being dropped. */ + + autoinc_lock = ib_vector_get(trx->autoinc_locks, i); + + /* This is the default fast case. */ + + if (autoinc_lock == lock) { + lock_table_pop_autoinc_locks(trx); + } else { + /* The last element should never be NULL */ + ut_a(autoinc_lock != NULL); + + /* Handle freeing the locks from within the stack. */ + + while (--i >= 0) { + autoinc_lock = ib_vector_get(trx->autoinc_locks, i); + + if (UNIV_LIKELY(autoinc_lock == lock)) { + ib_vector_set(trx->autoinc_locks, i, NULL); + return; + } + } + + /* Must find the autoinc lock. */ + ut_error; + } +} + /*************************************************************//** Removes a table lock request from the queue and the trx list of locks; this is a low-level function which does NOT check if waiting requests @@ -3670,10 +3744,8 @@ lock_table_remove_low( if (!lock_get_wait(lock) && !ib_vector_is_empty(trx->autoinc_locks)) { - lock_t* autoinc_lock; - autoinc_lock = ib_vector_pop(trx->autoinc_locks); - ut_a(autoinc_lock == lock); + lock_table_remove_autoinc_lock(lock, trx); } ut_a(table->n_waiting_or_granted_auto_inc_locks > 0); diff --git a/log/log0recv.c b/log/log0recv.c index 5702f47ad5f..8ef24995b09 100644 --- a/log/log0recv.c +++ b/log/log0recv.c @@ -2260,7 +2260,7 @@ recv_report_corrupt_log( "InnoDB: far enough in recovery! Please run CHECK TABLE\n" "InnoDB: on your InnoDB tables to check that they are ok!\n" "InnoDB: If mysqld crashes after this recovery, look at\n" - "InnoDB: " REFMAN "forcing-recovery.html\n" + "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" "InnoDB: about forcing recovery.\n", stderr); fflush(stderr); diff --git a/mem/mem0mem.c b/mem/mem0mem.c index 1dd4db30841..86100b04fd6 100644 --- a/mem/mem0mem.c +++ b/mem/mem0mem.c @@ -347,7 +347,7 @@ mem_heap_create_block( return(NULL); } } else { - buf_block = buf_block_alloc(0); + buf_block = buf_block_alloc(); } block = (mem_block_t*) buf_block->frame; diff --git a/mtr/mtr0log.c b/mtr/mtr0log.c index 3f3dab36b76..3349036b5b3 100644 --- a/mtr/mtr0log.c +++ b/mtr/mtr0log.c @@ -408,7 +408,7 @@ mlog_parse_string( ptr += 2; if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) - || UNIV_UNLIKELY(len + offset) > UNIV_PAGE_SIZE) { + || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) { recv_sys->found_corrupt_log = TRUE; return(NULL); diff --git a/mtr/mtr0mtr.c b/mtr/mtr0mtr.c index 44d11fe5b1d..34e6d3ffc92 100644 --- a/mtr/mtr0mtr.c +++ b/mtr/mtr0mtr.c @@ -33,6 +33,7 @@ Created 11/26/1995 Heikki Tuuri #include "page0types.h" #include "mtr0log.h" #include "log0log.h" +#include "buf0flu.h" #ifndef UNIV_HOTBACKUP # include "log0recv.h" diff --git a/os/os0file.c b/os/os0file.c index 7bb9440b184..31dec031af3 100644 --- a/os/os0file.c +++ b/os/os0file.c @@ -142,6 +142,7 @@ struct os_aio_slot_struct{ // made and only the slot message // needs to be passed to the caller // of os_aio_simulated_handle */ + ulint space_id; fil_node_t* message1; /*!< message which is given by the */ void* message2; /*!< the requester of an aio operation and which can be used to identify @@ -3391,7 +3392,7 @@ os_aio_array_reserve_slot( ulint offset_high, /*!< in: most significant 32 bits of offset */ ulint len, /*!< in: length of the block to read or write */ - trx_t* trx) + ulint space_id) { os_aio_slot_t* slot; ulint i; @@ -3473,6 +3474,7 @@ found: slot->offset_high = offset_high; // slot->io_already_done = FALSE; slot->status = OS_AIO_NOT_ISSUED; + slot->space_id = space_id; #ifdef WIN_ASYNC_IO control = &(slot->control); @@ -3681,6 +3683,7 @@ os_aio( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + ulint space_id, trx_t* trx) { os_aio_array_t* array; @@ -3763,7 +3766,7 @@ try_again: trx->io_read += n; } slot = os_aio_array_reserve_slot(type, array, message1, message2, file, - name, buf, offset, offset_high, n, trx); + name, buf, offset, offset_high, n, space_id); if (type == OS_FILE_READ) { if (os_aio_use_native_aio) { #ifdef WIN_ASYNC_IO @@ -3873,7 +3876,8 @@ os_aio_windows_handle( parameters are valid and can be used to restart the operation, for example */ void** message2, - ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* space_id) { ulint orig_seg = segment; os_aio_array_t* array; @@ -3927,6 +3931,7 @@ os_aio_windows_handle( *message2 = slot->message2; *type = slot->type; + *space_id = slot->space_id; if (ret && len == slot->len) { ret_val = TRUE; @@ -4010,7 +4015,8 @@ os_aio_simulated_handle( parameters are valid and can be used to restart the operation, for example */ void** message2, - ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* space_id) { os_aio_array_t* array; ulint segment; @@ -4301,6 +4307,7 @@ slot_io_done: *message2 = slot->message2; *type = slot->type; + *space_id = slot->space_id; os_mutex_exit(array->mutex); diff --git a/page/page0cur.c b/page/page0cur.c index fa3d2532deb..f10f16a7dd9 100644 --- a/page/page0cur.c +++ b/page/page0cur.c @@ -564,74 +564,6 @@ page_cur_open_on_rnd_user_rec( } while (rnd--); } -UNIV_INTERN -void -page_cur_open_on_nth_user_rec( -/*==========================*/ - buf_block_t* block, /*!< in: page */ - page_cur_t* cursor, /*!< out: page cursor */ - ulint nth) -{ - ulint n_recs = page_get_n_recs(buf_block_get_frame(block)); - - page_cur_set_before_first(block, cursor); - - if (UNIV_UNLIKELY(n_recs == 0)) { - - return; - } - - nth--; - - if (nth >= n_recs) { - nth = n_recs - 1; - } - - do { - page_cur_move_to_next(cursor); - } while (nth--); -} - -UNIV_INTERN -ibool -page_cur_open_on_rnd_user_rec_after_nth( -/*==========================*/ - buf_block_t* block, /*!< in: page */ - page_cur_t* cursor, /*!< out: page cursor */ - ulint nth) -{ - ulint rnd; - ulint n_recs = page_get_n_recs(buf_block_get_frame(block)); - ibool ret; - - page_cur_set_before_first(block, cursor); - - if (UNIV_UNLIKELY(n_recs == 0)) { - - return (FALSE); - } - - nth--; - - if (nth >= n_recs) { - nth = n_recs - 1; - } - - rnd = (ulint) (nth + page_cur_lcg_prng() % (n_recs - nth)); - - if (rnd == nth) { - ret = TRUE; - } else { - ret = FALSE; - } - - do { - page_cur_move_to_next(cursor); - } while (rnd--); - - return (ret); -} - /***********************************************************//** Writes the log record of a record insert on a page. */ static diff --git a/page/page0zip.c b/page/page0zip.c index a94d2d54417..5b4f5d3b76a 100644 --- a/page/page0zip.c +++ b/page/page0zip.c @@ -4443,7 +4443,7 @@ page_zip_reorganize( log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); #ifndef UNIV_HOTBACKUP - temp_block = buf_block_alloc(0); + temp_block = buf_block_alloc(); btr_search_drop_page_hash_index(block); block->check_index_page_at_flush = TRUE; #else /* !UNIV_HOTBACKUP */ diff --git a/percona-suite/innodb_fix_misc_bug51325.result b/percona-suite/innodb_fix_misc_bug51325.result new file mode 100644 index 00000000000..87cd4a2971a --- /dev/null +++ b/percona-suite/innodb_fix_misc_bug51325.result @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t1; +SET GLOBAL innodb_file_per_table=ON; +SHOW VARIABLES LIKE 'innodb_lazy_drop_table'; +Variable_name Value +innodb_lazy_drop_table 0 +SET GLOBAL innodb_lazy_drop_table=1; +SHOW VARIABLES LIKE 'innodb_lazy_drop_table'; +Variable_name Value +innodb_lazy_drop_table 1 +CREATE TABLE t1 (a INT) ENGINE=InnoDB; +DROP TABLE t1; +SET GLOBAL innodb_lazy_drop_table=default; +SET GLOBAL innodb_file_per_table=default; diff --git a/percona-suite/innodb_fix_misc_bug51325.test b/percona-suite/innodb_fix_misc_bug51325.test new file mode 100644 index 00000000000..78d6e60046a --- /dev/null +++ b/percona-suite/innodb_fix_misc_bug51325.test @@ -0,0 +1,13 @@ +# Test for 'innodb_lazy_drop_table' variable +--source include/have_innodb.inc +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings +SET GLOBAL innodb_file_per_table=ON; +SHOW VARIABLES LIKE 'innodb_lazy_drop_table'; +SET GLOBAL innodb_lazy_drop_table=1; +SHOW VARIABLES LIKE 'innodb_lazy_drop_table'; +CREATE TABLE t1 (a INT) ENGINE=InnoDB; +DROP TABLE t1; +SET GLOBAL innodb_lazy_drop_table=default; +SET GLOBAL innodb_file_per_table=default; diff --git a/percona-suite/show_slave_status_nolock.patch/percona_show_slave_status_nolock.result b/percona-suite/show_slave_status_nolock.patch/percona_show_slave_status_nolock.result index a028aeadde4..15cefd67c1e 100644 --- a/percona-suite/show_slave_status_nolock.patch/percona_show_slave_status_nolock.result +++ b/percona-suite/show_slave_status_nolock.patch/percona_show_slave_status_nolock.result @@ -1,9 +1,5 @@ -stop slave; -drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; -reset master; -reset slave; -drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; -start slave; +include/master-slave.inc +[connection master] DROP TABLE IF EXISTS t; CREATE TABLE t(id INT); INSERT INTO t SELECT SLEEP(10); @@ -15,7 +11,11 @@ master 1 slave count(*) slave 0 SHOW SLAVE STATUS NOLOCK; +include/wait_for_slave_to_stop.inc START SLAVE; +include/wait_for_slave_to_start.inc slave count(*) slave 1 DROP TABLE t; +STOP SLAVE; +include/wait_for_slave_to_stop.inc diff --git a/percona-suite/show_slave_status_nolock.patch/percona_show_slave_status_nolock.test b/percona-suite/show_slave_status_nolock.patch/percona_show_slave_status_nolock.test index 9e856d24956..d8ae98262ea 100644 --- a/percona-suite/show_slave_status_nolock.patch/percona_show_slave_status_nolock.test +++ b/percona-suite/show_slave_status_nolock.patch/percona_show_slave_status_nolock.test @@ -45,3 +45,6 @@ connection slave; connection master; DROP TABLE t; sync_slave_with_master; + +STOP SLAVE; +--source include/wait_for_slave_to_stop.inc diff --git a/percona-suite/slow_extended.patch/percona_slow_extended-slave_innodb_stats.result b/percona-suite/slow_extended.patch/percona_slow_extended-slave_innodb_stats.result index 162c92afbd9..c2292c4d2ac 100644 --- a/percona-suite/slow_extended.patch/percona_slow_extended-slave_innodb_stats.result +++ b/percona-suite/slow_extended.patch/percona_slow_extended-slave_innodb_stats.result @@ -1,9 +1,5 @@ -stop slave; -drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; -reset master; -reset slave; -drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; -start slave; +include/master-slave.inc +[connection master] DROP TABLE IF EXISTS t; CREATE TABLE t(id INT,data CHAR(30)) ENGINE=InnoDB; INSERT INTO t VALUES @@ -15,7 +11,9 @@ INSERT INTO t VALUES INSERT INTO t SELECT t2.id,t2.data from t as t1, t as t2; INSERT INTO t SELECT t2.id,t2.data from t as t1, t as t2; STOP SLAVE; +include/wait_for_slave_to_stop.inc START SLAVE; +include/wait_for_slave_to_start.inc INSERT INTO t SELECT t.id,t.data from t; DROP TABLE IF EXISTS t; FLUSH LOGS; diff --git a/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements-and-use_global_long_query_time.result b/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements-and-use_global_long_query_time.result index 386faf225ec..e96126e34c7 100644 --- a/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements-and-use_global_long_query_time.result +++ b/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements-and-use_global_long_query_time.result @@ -1,15 +1,12 @@ # Activate master-slave replication -stop slave; -drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; -reset master; -reset slave; -drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; -start slave; +include/master-slave.inc +[connection master] # Make table t for test DROP TABLE IF EXISTS t; CREATE TABLE t(id INT); # Start slave replication START SLAVE; +include/wait_for_slave_to_start.inc INSERT INTO t VALUES (1); # Read and change log_slow_slave_statements to ON on slave show variables like 'log_slow_slave_statements'; @@ -22,7 +19,9 @@ log_slow_slave_statements ON INSERT INTO t VALUES (2); # Restart slave STOP SLAVE; +include/wait_for_slave_to_stop.inc START SLAVE; +include/wait_for_slave_to_start.inc INSERT INTO t VALUES (3); show variables like 'long_query_time'; Variable_name Value @@ -91,3 +90,5 @@ FLUSH LOGS; 1 set global log_slow_slave_statements=OFF; DROP TABLE t; +STOP SLAVE; +include/wait_for_slave_to_stop.inc diff --git a/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements-and-use_global_long_query_time.test b/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements-and-use_global_long_query_time.test index b5a6154bbfc..755b83fa93b 100644 --- a/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements-and-use_global_long_query_time.test +++ b/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements-and-use_global_long_query_time.test @@ -111,3 +111,6 @@ DROP TABLE t; sync_slave_with_master; connection slave; + +STOP SLAVE; +-- source include/wait_for_slave_to_stop.inc diff --git a/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements.result b/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements.result index 9f9085ff444..6563d6d863e 100644 --- a/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements.result +++ b/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements.result @@ -1,15 +1,12 @@ # Activate master-slave replication -stop slave; -drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; -reset master; -reset slave; -drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; -start slave; +include/master-slave.inc +[connection master] # Make table t for test DROP TABLE IF EXISTS t; CREATE TABLE t(id INT); # Start slave replication START SLAVE; +include/wait_for_slave_to_start.inc INSERT INTO t VALUES (1); # Read information about master binlog # Sync(1) slave thread @@ -26,7 +23,9 @@ INSERT INTO t VALUES (2); # Sync slave(2) thread # Restart slave STOP SLAVE; +include/wait_for_slave_to_stop.inc START SLAVE; +include/wait_for_slave_to_start.inc INSERT INTO t VALUES (3); # Read information about master binlog # Sync(3) slave thread @@ -43,7 +42,9 @@ INSERT INTO t VALUES (4); # Sync slave(4) thread # Restart slave STOP SLAVE; +include/wait_for_slave_to_stop.inc START SLAVE; +include/wait_for_slave_to_start.inc INSERT INTO t VALUES (5); # Read information about master binlog # Sync slave(5) thread @@ -60,7 +61,9 @@ INSERT INTO t VALUES (6); # Sync slave(6) thread # Restart slave STOP SLAVE; +include/wait_for_slave_to_stop.inc START SLAVE; +include/wait_for_slave_to_start.inc INSERT INTO t VALUES (7); # Read information about master binlog # Sync slave(7) thread @@ -86,3 +89,5 @@ set global log_slow_slave_statements=OFF; DROP TABLE t; # Read information about master binlog # Sync slave(8) thread +STOP SLAVE; +include/wait_for_slave_to_stop.inc diff --git a/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements.test b/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements.test index bce7a5f0530..bdf37533933 100644 --- a/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements.test +++ b/percona-suite/slow_extended.patch/percona_slow_extended-slave_statements.test @@ -163,3 +163,6 @@ let $binlog_position = query_get_value(SHOW MASTER STATUS,Position,1); -- echo # Sync slave(8) thread connection slave; let $sync_result = `SELECT MASTER_POS_WAIT('$binlog_file',$binlog_position)`; + +STOP SLAVE; +-- source include/wait_for_slave_to_stop.inc diff --git a/percona-suite/userstat_bug602047.result b/percona-suite/userstat_bug602047.result new file mode 100644 index 00000000000..bc14ca3362a --- /dev/null +++ b/percona-suite/userstat_bug602047.result @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS t1; +SET @userstat_running_old= @@userstat_running; +SET GLOBAL userstat_running=ON; +CREATE TABLE t1 ( id int(10), PRIMARY KEY (id)) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); +SELECT COUNT(*) FROM t1; +COUNT(*) +10 +SELECT ROWS_READ FROM information_schema.table_statistics WHERE TABLE_NAME='t1'; +ROWS_READ +10 +SELECT ROWS_READ FROM information_schema.index_statistics WHERE TABLE_NAME='t1'; +ROWS_READ +10 +SET GLOBAL userstat_running= @userstat_running_old; +DROP TABLE t1; diff --git a/percona-suite/userstat_bug602047.test b/percona-suite/userstat_bug602047.test new file mode 100644 index 00000000000..37c08d32bf0 --- /dev/null +++ b/percona-suite/userstat_bug602047.test @@ -0,0 +1,13 @@ +--source include/have_innodb.inc +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings +SET @userstat_running_old= @@userstat_running; +SET GLOBAL userstat_running=ON; +CREATE TABLE t1 ( id int(10), PRIMARY KEY (id)) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); +SELECT COUNT(*) FROM t1; +SELECT ROWS_READ FROM information_schema.table_statistics WHERE TABLE_NAME='t1'; +SELECT ROWS_READ FROM information_schema.index_statistics WHERE TABLE_NAME='t1'; +SET GLOBAL userstat_running= @userstat_running_old; +DROP TABLE t1; diff --git a/plug.in b/plug.in index 4a621361671..427cc87eeb5 100644 --- a/plug.in +++ b/plug.in @@ -17,6 +17,8 @@ MYSQL_STORAGE_ENGINE(innodb_plugin,, [InnoDB Storage Engine], [Transactional Tables using InnoDB], [max,max-no-ndb]) MYSQL_PLUGIN_DIRECTORY(innodb_plugin, [storage/innodb_plugin]) +# Enable if you know what you are doing (trying to link both InnoDB and +# InnoDB Plugin statically into MySQL does not work). MYSQL_PLUGIN_STATIC(innodb_plugin, [libinnobase.a]) MYSQL_PLUGIN_DYNAMIC(innodb_plugin, [ha_innodb_plugin.la]) MYSQL_PLUGIN_ACTIONS(innodb_plugin, [ @@ -137,17 +139,24 @@ MYSQL_PLUGIN_ACTIONS(innodb_plugin, [ ) AC_MSG_CHECKING(whether Solaris libc atomic functions are available) - # either define HAVE_IB_SOLARIS_ATOMICS or not - AC_CHECK_FUNCS(atomic_cas_ulong \ + # Define HAVE_IB_SOLARIS_ATOMICS if _all_ of the following + # functions are present. + AC_CHECK_FUNCS(atomic_add_long_nv \ atomic_cas_32 \ atomic_cas_64 \ - atomic_add_long_nv \ - atomic_swap_uchar, + atomic_cas_ulong \ + atomic_swap_uchar) - AC_DEFINE([HAVE_IB_SOLARIS_ATOMICS], [1], - [Define to 1 if Solaris libc atomic functions \ - are available]) - ) + if test "${ac_cv_func_atomic_add_long_nv}" = "yes" -a \ + "${ac_cv_func_atomic_cas_32}" = "yes" -a \ + "${ac_cv_func_atomic_cas_64}" = "yes" -a \ + "${ac_cv_func_atomic_cas_ulong}" = "yes" -a \ + "${ac_cv_func_atomic_swap_uchar}" = "yes" ; then + + AC_DEFINE([HAVE_IB_SOLARIS_ATOMICS], [1], + [Define to 1 if Solaris libc atomic functions are available] + ) + fi AC_MSG_CHECKING(whether pthread_t can be used by Solaris libc atomic functions) # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not diff --git a/rem/rem0cmp.c b/rem/rem0cmp.c index 8ee434f85da..04d2c15437b 100644 --- a/rem/rem0cmp.c +++ b/rem/rem0cmp.c @@ -862,15 +862,18 @@ cmp_rec_rec_with_match( const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index, /*!< in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /*!< in/out: number of already completely matched fields; when the function returns, contains the value the for current comparison */ - ulint* matched_bytes, /*!< in/out: number of already matched + ulint* matched_bytes) /*!< in/out: number of already matched bytes within the first field not completely matched; when the function returns, contains the value for the current comparison */ - ulint stats_method) { ulint rec1_n_fields; /* the number of fields in rec */ ulint rec1_f_len; /* length of current field in rec */ @@ -962,13 +965,13 @@ cmp_rec_rec_with_match( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { - - if (stats_method == SRV_STATS_METHOD_NULLS_EQUAL) { - goto next_field; - } else { + /* This is limited to stats collection, + cannot use it for regular search */ + if (nulls_unequal) { ret = -1; + } else { + goto next_field; } - } else if (rec2_f_len == UNIV_SQL_NULL) { /* We define the SQL null to be the diff --git a/row/row0ins.c b/row/row0ins.c index 3372e1480b5..efcea62f212 100644 --- a/row/row0ins.c +++ b/row/row0ins.c @@ -2026,6 +2026,8 @@ row_ins_index_entry_low( } #ifdef UNIV_DEBUG + if (!srv_use_sys_stats_table + || index != UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) { page_t* page = btr_cur_get_page(&cursor); rec_t* first_rec = page_rec_get_next( @@ -2136,7 +2138,7 @@ function_exit: err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(&cursor), - rec, offsets, big_rec, &mtr); + rec, offsets, &mtr, FALSE, big_rec); if (modify) { dtuple_big_rec_free(big_rec); diff --git a/row/row0merge.c b/row/row0merge.c index 402a168f5dd..782c029fbcc 100644 --- a/row/row0merge.c +++ b/row/row0merge.c @@ -2350,7 +2350,7 @@ row_merge_rename_tables( { ulint err = DB_ERROR; pars_info_t* info; - char old_name[MAX_TABLE_NAME_LEN + 1]; + char old_name[MAX_FULL_NAME_LEN + 1]; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_ad(old_table != new_table); @@ -2365,7 +2365,7 @@ row_merge_rename_tables( ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: too long table name: '%s', " "max length is %d\n", old_table->name, - MAX_TABLE_NAME_LEN); + MAX_FULL_NAME_LEN); ut_error; } diff --git a/row/row0mysql.c b/row/row0mysql.c index 4639dd4aaf7..2728b3d66d9 100644 --- a/row/row0mysql.c +++ b/row/row0mysql.c @@ -51,6 +51,7 @@ Created 9/17/2000 Heikki Tuuri #include "btr0sea.h" #include "fil0fil.h" #include "ibuf0ibuf.h" +#include "ha_prototypes.h" /** Provide optional 4.x backwards compatibility for 5.0 and above */ UNIV_INTERN ibool row_rollback_on_timeout = FALSE; @@ -573,7 +574,7 @@ handle_new_error: "InnoDB: If the mysqld server crashes" " after the startup or when\n" "InnoDB: you dump the tables, look at\n" - "InnoDB: " REFMAN "forcing-recovery.html" + "InnoDB: " REFMAN "forcing-innodb-recovery.html" " for help.\n", stderr); break; case DB_FOREIGN_EXCEED_MAX_CASCADE: @@ -2094,6 +2095,32 @@ row_insert_stats_for_mysql( return((int) err); } +/*********************************************************************//** +*/ +UNIV_INTERN +int +row_delete_stats_for_mysql( +/*=============================*/ + dict_index_t* index, + trx_t* trx) +{ + pars_info_t* info = pars_info_create(); + + trx->op_info = "delete rows from SYS_STATS"; + + trx_start_if_not_started(trx); + trx->error_state = DB_SUCCESS; + + pars_info_add_dulint_literal(info, "indexid", index->id); + + return((int) que_eval_sql(info, + "PROCEDURE DELETE_STATISTICS_PROC () IS\n" + "BEGIN\n" + "DELETE FROM SYS_STATS WHERE INDEX_ID = :indexid;\n" + "END;\n" + , TRUE, trx)); +} + /*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function diff --git a/row/row0purge.c b/row/row0purge.c index 31b255cf2d4..752a2ec9e83 100644 --- a/row/row0purge.c +++ b/row/row0purge.c @@ -387,8 +387,11 @@ Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ static void -row_purge_upd_exist_or_extern( -/*==========================*/ +row_purge_upd_exist_or_extern_func( +/*===============================*/ +#ifdef UNIV_DEBUG + const que_thr_t*thr, /*!< in: query thread */ +#endif /* UNIV_DEBUG */ purge_node_t* node) /*!< in: row purge node */ { mem_heap_t* heap; @@ -413,8 +416,8 @@ row_purge_upd_exist_or_extern( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field_binary(NULL, node->index, - node->update)) { + if (row_upd_changes_ord_field_binary(node->index, node->update, + thr, NULL, NULL)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, NULL, index, heap); @@ -496,6 +499,14 @@ skip_secondaries: } } +#ifdef UNIV_DEBUG +# define row_purge_upd_exist_or_extern(thr,node) \ + row_purge_upd_exist_or_extern_func(thr,node) +#else /* UNIV_DEBUG */ +# define row_purge_upd_exist_or_extern(thr,node) \ + row_purge_upd_exist_or_extern_func(node) +#endif /* UNIV_DEBUG */ + /***********************************************************//** Parses the row reference and other info in a modify undo log record. @return TRUE if purge operation required: NOTE that then the CALLER @@ -602,47 +613,32 @@ err_exit: /***********************************************************//** Fetches an undo log record and does the purge for the recorded operation. If none left, or the current purge completed, returns the control to the -parent node, which is always a query thread node. -@return DB_SUCCESS if operation successfully completed, else error code */ -static -ulint +parent node, which is always a query thread node. */ +static __attribute__((nonnull)) +void row_purge( /*======*/ purge_node_t* node, /*!< in: row purge node */ que_thr_t* thr) /*!< in: query thread */ { - roll_ptr_t roll_ptr; - ibool purge_needed; ibool updated_extern; - trx_t* trx; - ut_ad(node && thr); + ut_ad(node); + ut_ad(thr); - trx = thr_get_trx(thr); - - node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr, - &(node->reservation), + node->undo_rec = trx_purge_fetch_next_rec(&node->roll_ptr, + &node->reservation, node->heap); if (!node->undo_rec) { /* Purge completed for this query thread */ thr->run_node = que_node_get_parent(node); - return(DB_SUCCESS); + return; } - node->roll_ptr = roll_ptr; - - if (node->undo_rec == &trx_purge_dummy_rec) { - purge_needed = FALSE; - } else { - purge_needed = row_purge_parse_undo_rec(node, &updated_extern, - thr); - /* If purge_needed == TRUE, we must also remember to unfreeze - data dictionary! */ - } - - if (purge_needed) { + if (node->undo_rec != &trx_purge_dummy_rec + && row_purge_parse_undo_rec(node, &updated_extern, thr)) { node->found_clust = FALSE; node->index = dict_table_get_next_index( @@ -654,14 +650,14 @@ row_purge( } else if (updated_extern || node->rec_type == TRX_UNDO_UPD_EXIST_REC) { - row_purge_upd_exist_or_extern(node); + row_purge_upd_exist_or_extern(thr, node); } if (node->found_clust) { btr_pcur_close(&(node->pcur)); } - row_mysql_unfreeze_data_dictionary(trx); + row_mysql_unfreeze_data_dictionary(thr_get_trx(thr)); } /* Do some cleanup */ @@ -669,8 +665,6 @@ row_purge( mem_heap_empty(node->heap); thr->run_node = node; - - return(DB_SUCCESS); } /***********************************************************//** @@ -684,9 +678,6 @@ row_purge_step( que_thr_t* thr) /*!< in: query thread */ { purge_node_t* node; -#ifdef UNIV_DEBUG - ulint err; -#endif /* UNIV_DEBUG */ ut_ad(thr); @@ -694,12 +685,7 @@ row_purge_step( ut_ad(que_node_get_type(node) == QUE_NODE_PURGE); -#ifdef UNIV_DEBUG - err = -#endif /* UNIV_DEBUG */ row_purge(node, thr); - ut_ad(err == DB_SUCCESS); - return(thr); } diff --git a/row/row0row.c b/row/row0row.c index 8e806a14a98..0783d482f76 100644 --- a/row/row0row.c +++ b/row/row0row.c @@ -347,6 +347,14 @@ row_rec_to_index_entry_low( rec_len = rec_offs_n_fields(offsets); + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) { + if (rec_len < dict_index_get_n_fields(index)) { + /* the new record should be extended */ + rec_len = dict_index_get_n_fields(index); + } + } + entry = dtuple_create(heap, rec_len); dtuple_set_n_fields_cmp(entry, @@ -358,6 +366,14 @@ row_rec_to_index_entry_low( for (i = 0; i < rec_len; i++) { dfield = dtuple_get_nth_field(entry, i); + + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes) + && i >= rec_offs_n_fields(offsets)) { + dfield_set_null(dfield); + continue; + } + field = rec_get_nth_field(rec, offsets, i, &len); dfield_set_data(dfield, field, len); diff --git a/row/row0umod.c b/row/row0umod.c index 5998dadd16d..5202a498eed 100644 --- a/row/row0umod.c +++ b/row/row0umod.c @@ -173,40 +173,26 @@ row_undo_mod_remove_clust_low( mtr_t* mtr, /*!< in: mtr */ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { - btr_pcur_t* pcur; btr_cur_t* btr_cur; ulint err; - ibool success; ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); - pcur = &(node->pcur); - btr_cur = btr_pcur_get_btr_cur(pcur); - success = btr_pcur_restore_position(mode, pcur, mtr); + /* Find out if the record has been purged already + or if we can remove it. */ - if (!success) { + if (!btr_pcur_restore_position(mode, &node->pcur, mtr) + || row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) { return(DB_SUCCESS); } - /* Find out if we can remove the whole clustered index record */ - - if (node->rec_type == TRX_UNDO_UPD_DEL_REC - && !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) { - - /* Ok, we can remove */ - } else { - return(DB_SUCCESS); - } + btr_cur = btr_pcur_get_btr_cur(&node->pcur); if (mode == BTR_MODIFY_LEAF) { - success = btr_cur_optimistic_delete(btr_cur, mtr); - - if (success) { - err = DB_SUCCESS; - } else { - err = DB_FAIL; - } + err = btr_cur_optimistic_delete(btr_cur, mtr) + ? DB_SUCCESS + : DB_FAIL; } else { ut_ad(mode == BTR_MODIFY_TREE); @@ -668,19 +654,19 @@ row_undo_mod_upd_exist_sec( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field_binary(node->row, node->index, - node->update)) { + if (row_upd_changes_ord_field_binary(node->index, node->update, + thr, + node->row, node->ext)) { /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The server must have crashed in - row_upd_clust_rec_by_insert(), in - row_ins_index_entry_low() before - btr_store_big_rec_extern_fields() - has written the externally stored columns - (BLOBs) of the new clustered index entry. */ + row_upd_clust_rec_by_insert() before + the updated externally stored columns (BLOBs) + of the new clustered index entry were + written. */ /* The table must be in DYNAMIC or COMPRESSED format. REDUNDANT and COMPACT formats diff --git a/row/row0upd.c b/row/row0upd.c index 0dc550b93f5..ba4096f8d4d 100644 --- a/row/row0upd.c +++ b/row/row0upd.c @@ -439,6 +439,12 @@ row_upd_changes_field_size_or_external( 0); } + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes) + && upd_field->field_no >= rec_offs_n_fields(offsets)) { + return(TRUE); + } + old_len = rec_offs_nth_size(offsets, upd_field->field_no); if (rec_offs_comp(offsets) @@ -844,6 +850,18 @@ row_upd_build_difference_binary( for (i = 0; i < dtuple_get_n_fields(entry); i++) { + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes) + && i >= rec_offs_n_fields(offsets)) { + dfield = dtuple_get_nth_field(entry, i); + + upd_field = upd_get_nth_field(update, n_diff); + dfield_copy(&(upd_field->new_val), dfield); + upd_field_set_field_no(upd_field, i, index, trx); + n_diff++; + goto skip_compare; + } + data = rec_get_nth_field(rec, offsets, i, &len); dfield = dtuple_get_nth_field(entry, i); @@ -1192,26 +1210,33 @@ NOTE: we compare the fields as binary strings! @return TRUE if update vector changes an ordering field in the index record */ UNIV_INTERN ibool -row_upd_changes_ord_field_binary( -/*=============================*/ +row_upd_changes_ord_field_binary_func( +/*==================================*/ + dict_index_t* index, /*!< in: index of the record */ + const upd_t* update, /*!< in: update vector for the row; NOTE: the + field numbers in this MUST be clustered index + positions! */ +#ifdef UNIV_DEBUG + const que_thr_t*thr, /*!< in: query thread */ +#endif /* UNIV_DEBUG */ const dtuple_t* row, /*!< in: old value of row, or NULL if the row and the data values in update are not known when this function is called, e.g., at compile time */ - dict_index_t* index, /*!< in: index of the record */ - const upd_t* update) /*!< in: update vector for the row; NOTE: the - field numbers in this MUST be clustered index - positions! */ + const row_ext_t*ext) /*!< NULL, or prefixes of the externally + stored columns in the old row */ { - ulint n_unique; - ulint n_upd_fields; - ulint i, j; - dict_index_t* clust_index; + ulint n_unique; + ulint i; + const dict_index_t* clust_index; - ut_ad(update && index); + ut_ad(index); + ut_ad(update); + ut_ad(thr); + ut_ad(thr->graph); + ut_ad(thr->graph->trx); n_unique = dict_index_get_n_unique(index); - n_upd_fields = upd_get_n_fields(update); clust_index = dict_table_get_first_index(index->table); @@ -1219,33 +1244,81 @@ row_upd_changes_ord_field_binary( const dict_field_t* ind_field; const dict_col_t* col; - ulint col_pos; ulint col_no; + const upd_field_t* upd_field; + const dfield_t* dfield; + dfield_t dfield_ext; + ulint dfield_len; + const byte* buf; ind_field = dict_index_get_nth_field(index, i); col = dict_field_get_col(ind_field); - col_pos = dict_col_get_clust_pos(col, clust_index); col_no = dict_col_get_no(col); - for (j = 0; j < n_upd_fields; j++) { + upd_field = upd_get_field_by_field_no( + update, dict_col_get_clust_pos(col, clust_index)); - const upd_field_t* upd_field - = upd_get_nth_field(update, j); + if (upd_field == NULL) { + continue; + } - /* Note that if the index field is a column prefix - then it may be that row does not contain an externally - stored part of the column value, and we cannot compare - the datas */ + if (row == NULL) { + ut_ad(ext == NULL); + return(TRUE); + } - if (col_pos == upd_field->field_no - && (row == NULL - || ind_field->prefix_len > 0 - || !dfield_datas_are_binary_equal( - dtuple_get_nth_field(row, col_no), - &(upd_field->new_val)))) { + dfield = dtuple_get_nth_field(row, col_no); - return(TRUE); + /* This treatment of column prefix indexes is loosely + based on row_build_index_entry(). */ + + if (UNIV_LIKELY(ind_field->prefix_len == 0) + || dfield_is_null(dfield)) { + /* do nothing special */ + } else if (UNIV_LIKELY_NULL(ext)) { + /* Silence a compiler warning without + silencing a Valgrind error. */ + dfield_len = 0; + UNIV_MEM_INVALID(&dfield_len, sizeof dfield_len); + /* See if the column is stored externally. */ + buf = row_ext_lookup(ext, col_no, &dfield_len); + + ut_ad(col->ord_part); + + if (UNIV_LIKELY_NULL(buf)) { + if (UNIV_UNLIKELY(buf == field_ref_zero)) { + /* The externally stored field + was not written yet. This + record should only be seen by + recv_recovery_rollback_active(), + when the server had crashed before + storing the field. */ + ut_ad(thr->graph->trx->is_recovered); + ut_ad(trx_is_recv(thr->graph->trx)); + return(TRUE); + } + + goto copy_dfield; } + } else if (dfield_is_ext(dfield)) { + dfield_len = dfield_get_len(dfield); + ut_a(dfield_len > BTR_EXTERN_FIELD_REF_SIZE); + dfield_len -= BTR_EXTERN_FIELD_REF_SIZE; + ut_a(dict_index_is_clust(index) + || ind_field->prefix_len <= dfield_len); + buf = dfield_get_data(dfield); +copy_dfield: + ut_a(dfield_len > 0); + dfield_copy(&dfield_ext, dfield); + dfield_set_data(&dfield_ext, buf, dfield_len); + dfield = &dfield_ext; + } + + if (!dfield_datas_are_binary_equal( + dfield, &upd_field->new_val, + ind_field->prefix_len)) { + + return(TRUE); } } @@ -1329,7 +1402,7 @@ row_upd_changes_first_fields_binary( if (col_pos == upd_field->field_no && !dfield_datas_are_binary_equal( dtuple_get_nth_field(entry, i), - &(upd_field->new_val))) { + &upd_field->new_val, 0)) { return(TRUE); } @@ -1568,14 +1641,99 @@ row_upd_sec_step( ut_ad(!dict_index_is_clust(node->index)); if (node->state == UPD_NODE_UPDATE_ALL_SEC - || row_upd_changes_ord_field_binary(node->row, node->index, - node->update)) { + || row_upd_changes_ord_field_binary(node->index, node->update, + thr, node->row, node->ext)) { return(row_upd_sec_index_entry(node, thr)); } return(DB_SUCCESS); } +#ifdef UNIV_DEBUG +# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \ + row_upd_clust_rec_by_insert_inherit_func(rec,offsets,entry,update) +#else /* UNIV_DEBUG */ +# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \ + row_upd_clust_rec_by_insert_inherit_func(entry,update) +#endif /* UNIV_DEBUG */ +/*******************************************************************//** +Mark non-updated off-page columns inherited when the primary key is +updated. We must mark them as inherited in entry, so that they are not +freed in a rollback. A limited version of this function used to be +called btr_cur_mark_dtuple_inherited_extern(). +@return TRUE if any columns were inherited */ +static __attribute__((warn_unused_result)) +ibool +row_upd_clust_rec_by_insert_inherit_func( +/*=====================================*/ +#ifdef UNIV_DEBUG + const rec_t* rec, /*!< in: old record, or NULL */ + const ulint* offsets,/*!< in: rec_get_offsets(rec), or NULL */ +#endif /* UNIV_DEBUG */ + dtuple_t* entry, /*!< in/out: updated entry to be + inserted into the clustered index */ + const upd_t* update) /*!< in: update vector */ +{ + ibool inherit = FALSE; + ulint i; + + ut_ad(!rec == !offsets); + ut_ad(!rec || rec_offs_any_extern(offsets)); + + for (i = 0; i < dtuple_get_n_fields(entry); i++) { + dfield_t* dfield = dtuple_get_nth_field(entry, i); + byte* data; + ulint len; + + ut_ad(!offsets + || !rec_offs_nth_extern(offsets, i) + == !dfield_is_ext(dfield) + || upd_get_field_by_field_no(update, i)); + if (!dfield_is_ext(dfield) + || upd_get_field_by_field_no(update, i)) { + continue; + } + +#ifdef UNIV_DEBUG + if (UNIV_LIKELY(rec != NULL)) { + const byte* rec_data + = rec_get_nth_field(rec, offsets, i, &len); + ut_ad(len == dfield_get_len(dfield)); + ut_ad(len != UNIV_SQL_NULL); + ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE); + + rec_data += len - BTR_EXTERN_FIELD_REF_SIZE; + + /* The pointer must not be zero. */ + ut_ad(memcmp(rec_data, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + /* The BLOB must be owned. */ + ut_ad(!(rec_data[BTR_EXTERN_LEN] + & BTR_EXTERN_OWNER_FLAG)); + } +#endif /* UNIV_DEBUG */ + + len = dfield_get_len(dfield); + ut_a(len != UNIV_SQL_NULL); + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + data = dfield_get_data(dfield); + data += len - BTR_EXTERN_FIELD_REF_SIZE; + /* The pointer must not be zero. */ + ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)); + /* The BLOB must be owned. */ + ut_a(!(data[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); + + data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG; + /* The BTR_EXTERN_INHERITED_FLAG only matters in + rollback. Purge will always free the extern fields of + a delete-marked row. */ + + inherit = TRUE; + } + + return(inherit); +} + /***********************************************************//** Marks the clustered index record deleted and inserts the updated version of the record to the index. This function should be used when the ordering @@ -1594,14 +1752,16 @@ row_upd_clust_rec_by_insert( a foreign key constraint */ mtr_t* mtr) /*!< in/out: mtr; gets committed here */ { - mem_heap_t* heap = NULL; + mem_heap_t* heap; btr_pcur_t* pcur; btr_cur_t* btr_cur; trx_t* trx; dict_table_t* table; dtuple_t* entry; ulint err; - ibool change_ownership = FALSE; + ibool change_ownership = FALSE; + rec_t* rec; + ulint* offsets = NULL; ut_ad(node); ut_ad(dict_index_is_clust(index)); @@ -1611,53 +1771,7 @@ row_upd_clust_rec_by_insert( pcur = node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); - if (node->state != UPD_NODE_INSERT_CLUSTERED) { - rec_t* rec; - dict_index_t* index; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets; - rec_offs_init(offsets_); - - err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, - btr_cur, TRUE, thr, mtr); - if (err != DB_SUCCESS) { - mtr_commit(mtr); - return(err); - } - - /* Mark as not-owned the externally stored fields which the new - row inherits from the delete marked record: purge should not - free those externally stored fields even if the delete marked - record is removed from the index tree, or updated. */ - - rec = btr_cur_get_rec(btr_cur); - index = dict_table_get_first_index(table); - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - change_ownership = btr_cur_mark_extern_inherited_fields( - btr_cur_get_page_zip(btr_cur), rec, index, offsets, - node->update, mtr); - if (check_ref) { - /* NOTE that the following call loses - the position of pcur ! */ - err = row_upd_check_references_constraints( - node, pcur, table, index, offsets, thr, mtr); - if (err != DB_SUCCESS) { - mtr_commit(mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); - } - } - } - - mtr_commit(mtr); - - if (!heap) { - heap = mem_heap_create(500); - } - node->state = UPD_NODE_INSERT_CLUSTERED; + heap = mem_heap_create(1000); entry = row_build_index_entry(node->upd_row, node->upd_ext, index, heap); @@ -1665,23 +1779,104 @@ row_upd_clust_rec_by_insert( row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); - if (change_ownership) { - /* If we return from a lock wait, for example, we may have - extern fields marked as not-owned in entry (marked in the - if-branch above). We must unmark them, take the ownership - back. */ + switch (node->state) { + default: + ut_error; + case UPD_NODE_INSERT_BLOB: + /* A lock wait occurred in row_ins_index_entry() in + the previous invocation of this function. Mark the + off-page columns in the entry inherited. */ - btr_cur_unmark_dtuple_extern_fields(entry); + change_ownership = row_upd_clust_rec_by_insert_inherit( + NULL, NULL, entry, node->update); + ut_a(change_ownership); + /* fall through */ + case UPD_NODE_INSERT_CLUSTERED: + /* A lock wait occurred in row_ins_index_entry() in + the previous invocation of this function. */ + break; + case UPD_NODE_UPDATE_CLUSTERED: + /* This is the first invocation of the function where + we update the primary key. Delete-mark the old record + in the clustered index and prepare to insert a new entry. */ + rec = btr_cur_get_rec(btr_cur); + offsets = rec_get_offsets(rec, index, NULL, + ULINT_UNDEFINED, &heap); + ut_ad(page_rec_is_user_rec(rec)); - /* We must mark non-updated extern fields in entry as - inherited, so that a possible rollback will not free them. */ + err = btr_cur_del_mark_set_clust_rec( + BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur), + rec, index, offsets, TRUE, thr, mtr); + if (err != DB_SUCCESS) { +err_exit: + mtr_commit(mtr); + mem_heap_free(heap); + return(err); + } - btr_cur_mark_dtuple_inherited_extern(entry, node->update); + /* If the the new row inherits externally stored + fields (off-page columns a.k.a. BLOBs) from the + delete-marked old record, mark them disowned by the + old record and owned by the new entry. */ + + if (rec_offs_any_extern(offsets)) { + change_ownership = row_upd_clust_rec_by_insert_inherit( + rec, offsets, entry, node->update); + + if (change_ownership) { + btr_pcur_store_position(pcur, mtr); + } + } + + if (check_ref) { + /* NOTE that the following call loses + the position of pcur ! */ + err = row_upd_check_references_constraints( + node, pcur, table, index, offsets, thr, mtr); + if (err != DB_SUCCESS) { + goto err_exit; + } + } } + mtr_commit(mtr); + err = row_ins_index_entry(index, entry, node->upd_ext ? node->upd_ext->n_ext : 0, TRUE, thr); + node->state = change_ownership + ? UPD_NODE_INSERT_BLOB + : UPD_NODE_INSERT_CLUSTERED; + + if (err == DB_SUCCESS && change_ownership) { + /* Mark the non-updated fields disowned by the old record. */ + + /* NOTE: this transaction has an x-lock on the record + and therefore other transactions cannot modify the + record when we have no latch on the page. In addition, + we assume that other query threads of the same + transaction do not modify the record in the meantime. + Therefore we can assert that the restoration of the + cursor succeeds. */ + + mtr_start(mtr); + + if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr)) { + ut_error; + } + + rec = btr_cur_get_rec(btr_cur); + offsets = rec_get_offsets(rec, index, offsets, + ULINT_UNDEFINED, &heap); + ut_ad(page_rec_is_user_rec(rec)); + + btr_cur_disown_inherited_fields( + btr_cur_get_page_zip(btr_cur), + rec, index, offsets, node->update, mtr); + + mtr_commit(mtr); + } + mem_heap_free(heap); return(err); @@ -1775,7 +1970,7 @@ row_upd_clust_rec( index, btr_cur_get_block(btr_cur), rec, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap), - big_rec, mtr); + mtr, TRUE, big_rec); mtr_commit(mtr); } @@ -1825,8 +2020,9 @@ row_upd_del_mark_clust_rec( /* Mark the clustered index record deleted; we do not have to check locks, because we assume that we have an x-lock on the record */ - err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, - btr_cur, TRUE, thr, mtr); + err = btr_cur_del_mark_set_clust_rec( + BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur), + btr_cur_get_rec(btr_cur), index, offsets, TRUE, thr, mtr); if (err == DB_SUCCESS && check_ref) { /* NOTE that the following call loses the position of pcur ! */ @@ -1973,7 +2169,8 @@ exit_func: row_upd_store_row(node); - if (row_upd_changes_ord_field_binary(node->row, index, node->update)) { + if (row_upd_changes_ord_field_binary(index, node->update, thr, + node->row, node->ext)) { /* Update causes an ordering field (ordering fields within the B-tree) of the clustered index record to change: perform @@ -2042,7 +2239,8 @@ row_upd( } if (node->state == UPD_NODE_UPDATE_CLUSTERED - || node->state == UPD_NODE_INSERT_CLUSTERED) { + || node->state == UPD_NODE_INSERT_CLUSTERED + || node->state == UPD_NODE_INSERT_BLOB) { log_free_check(); err = row_upd_clust_step(node, thr); diff --git a/row/row0vers.c b/row/row0vers.c index b6d35363f08..d4fde0b939b 100644 --- a/row/row0vers.c +++ b/row/row0vers.c @@ -669,11 +669,15 @@ row_vers_build_for_semi_consistent_read( mutex_enter(&kernel_mutex); version_trx = trx_get_on_id(version_trx_id); + if (version_trx + && (version_trx->conc_state == TRX_COMMITTED_IN_MEMORY + || version_trx->conc_state == TRX_NOT_STARTED)) { + + version_trx = NULL; + } mutex_exit(&kernel_mutex); - if (!version_trx - || version_trx->conc_state == TRX_NOT_STARTED - || version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) { + if (!version_trx) { /* We found a version that belongs to a committed transaction: return it. */ diff --git a/srv/srv0srv.c b/srv/srv0srv.c index 0a84d91e9af..0b68574317f 100644 --- a/srv/srv0srv.c +++ b/srv/srv0srv.c @@ -269,6 +269,11 @@ UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75; /* variable counts amount of data read in total (in bytes) */ UNIV_INTERN ulint srv_data_read = 0; +/* Internal setting for "innodb_stats_method". Decides how InnoDB treats +NULL value when collecting statistics. By default, it is set to +SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ +ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL; + /* here we count the amount of data written in total (in bytes) */ UNIV_INTERN ulint srv_data_written = 0; @@ -388,7 +393,6 @@ UNIV_INTERN ibool srv_innodb_status = FALSE; /* When estimating number of different key values in an index, sample this many index pages */ UNIV_INTERN unsigned long long srv_stats_sample_pages = 8; -UNIV_INTERN ulint srv_stats_method = 0; UNIV_INTERN ulint srv_stats_auto_update = 1; UNIV_INTERN ulint srv_stats_update_need_lock = 1; UNIV_INTERN ibool srv_use_sys_stats_table = FALSE; @@ -419,6 +423,8 @@ UNIV_INTERN ulint srv_pass_corrupt_table = 0; /* 0:disable 1:enable */ UNIV_INTERN ulint srv_extra_rsegments = 0; /* extra rseg for users */ UNIV_INTERN ulint srv_dict_size_limit = 0; + +UNIV_INTERN ulint srv_lazy_drop_table = 0; /*-------------------------------------------*/ UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; @@ -2709,7 +2715,8 @@ srv_master_thread( unsigned space:32; unsigned offset:32; ib_uint64_t oldest_modification; - } prev_flush_info; + }; + struct t_prev_flush_info_struct prev_flush_info = {0,0,0,0}; ib_uint64_t lsn_old; @@ -2765,6 +2772,8 @@ loop: for (i = 0; i < 10; i++) { ulint cur_time = ut_time_ms(); + n_pages_flushed = 0; /* initialize */ + n_ios_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read + buf_pool->stat.n_pages_written; srv_main_thread_op_info = "sleeping"; @@ -3030,7 +3039,8 @@ retry_flush_batch: if (prev_adaptive_checkpoint == 3) { lint n_flush; - lint blocks_sum, new_blocks_sum, flushed_blocks_sum; + lint blocks_sum; + ulint new_blocks_sum, flushed_blocks_sum; blocks_sum = new_blocks_sum = flushed_blocks_sum = 0; diff --git a/sync/sync0arr.c b/sync/sync0arr.c index 223e1715944..57a288089c7 100644 --- a/sync/sync0arr.c +++ b/sync/sync0arr.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -714,7 +714,7 @@ print: fprintf(stderr, "rw-lock %p ", (void*) lock); sync_array_cell_print(stderr, cell); - rw_lock_debug_print(debug); + rw_lock_debug_print(stderr, debug); return(TRUE); } } diff --git a/sync/sync0rw.c b/sync/sync0rw.c index 21e4b6b03bf..9431de15fda 100644 --- a/sync/sync0rw.c +++ b/sync/sync0rw.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -231,10 +231,10 @@ rw_lock_create_func( # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ const char* cfile_name, /*!< in: file name where created */ - ulint cline) /*!< in: file line where created */ + ulint cline, /*!< in: file line where created */ +#endif /* UNIV_DEBUG */ + const char* cmutex_name) /*!< in: mutex name */ { /* If this is the very first time a synchronization object is created, then the following call initializes the sync system. */ @@ -924,7 +924,7 @@ rw_lock_list_print_info( info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { - rw_lock_debug_print(info); + rw_lock_debug_print(file, info); info = UT_LIST_GET_NEXT(list, info); } } @@ -972,7 +972,7 @@ rw_lock_print( info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { - rw_lock_debug_print(info); + rw_lock_debug_print(stderr, info); info = UT_LIST_GET_NEXT(list, info); } } @@ -984,28 +984,29 @@ UNIV_INTERN void rw_lock_debug_print( /*================*/ + FILE* f, /*!< in: output stream */ rw_lock_debug_t* info) /*!< in: debug struct */ { ulint rwt; rwt = info->lock_type; - fprintf(stderr, "Locked: thread %lu file %s line %lu ", + fprintf(f, "Locked: thread %lu file %s line %lu ", (ulong) os_thread_pf(info->thread_id), info->file_name, (ulong) info->line); if (rwt == RW_LOCK_SHARED) { - fputs("S-LOCK", stderr); + fputs("S-LOCK", f); } else if (rwt == RW_LOCK_EX) { - fputs("X-LOCK", stderr); + fputs("X-LOCK", f); } else if (rwt == RW_LOCK_WAIT_EX) { - fputs("WAIT X-LOCK", stderr); + fputs("WAIT X-LOCK", f); } else { ut_error; } if (info->pass != 0) { - fprintf(stderr, " pass value %lu", (ulong) info->pass); + fprintf(f, " pass value %lu", (ulong) info->pass); } - putc('\n', stderr); + putc('\n', f); } /***************************************************************//** diff --git a/sync/sync0sync.c b/sync/sync0sync.c index 225f28df78e..3a80da9318b 100644 --- a/sync/sync0sync.c +++ b/sync/sync0sync.c @@ -238,14 +238,14 @@ void mutex_create_func( /*==============*/ mutex_t* mutex, /*!< in: pointer to memory */ - const char* cmutex_name, /*!< in: mutex name */ #ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ const char* cfile_name, /*!< in: file name where created */ - ulint cline) /*!< in: file line where created */ + ulint cline, /*!< in: file line where created */ +#endif /* UNIV_DEBUG */ + const char* cmutex_name) /*!< in: mutex name */ { #if defined(HAVE_ATOMIC_BUILTINS) mutex_reset_lock_word(mutex); @@ -1161,6 +1161,7 @@ sync_thread_add_level( case SYNC_LOG: case SYNC_THR_LOCAL: case SYNC_ANY_LATCH: + case SYNC_OUTER_ANY_LATCH: case SYNC_TRX_SYS_HEADER: case SYNC_FILE_FORMAT_TAG: case SYNC_DOUBLEWRITE: diff --git a/trx/trx0i_s.c b/trx/trx0i_s.c index c11ebb6397e..e148234888b 100644 --- a/trx/trx0i_s.c +++ b/trx/trx0i_s.c @@ -431,7 +431,7 @@ i_s_locks_row_validate( /* record lock */ ut_ad(!strcmp("RECORD", row->lock_type)); ut_ad(row->lock_index != NULL); - ut_ad(row->lock_data != NULL); + /* row->lock_data == NULL if buf_page_try_get() == NULL */ ut_ad(row->lock_page != ULINT_UNDEFINED); ut_ad(row->lock_rec != ULINT_UNDEFINED); } @@ -494,7 +494,6 @@ fill_trx_row( stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); if (stmt != NULL) { - char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) { @@ -508,6 +507,8 @@ fill_trx_row( cache->storage, stmt, stmt_len + 1, MAX_ALLOWED_FOR_STORAGE(cache)); + row->trx_query_cs = innobase_get_charset(trx->mysql_thd); + if (row->trx_query == NULL) { return(FALSE); diff --git a/trx/trx0rec.c b/trx/trx0rec.c index f50e10ed756..71629f01d73 100644 --- a/trx/trx0rec.c +++ b/trx/trx0rec.c @@ -665,14 +665,27 @@ trx_undo_page_report_modify( /* Save to the undo log the old values of the columns to be updated. */ if (update) { + ulint extended = 0; + if (trx_undo_left(undo_page, ptr) < 5) { return(0); } - ptr += mach_write_compressed(ptr, upd_get_n_fields(update)); + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) { + for (i = 0; i < upd_get_n_fields(update); i++) { + ulint pos = upd_get_nth_field(update, i)->field_no; - for (i = 0; i < upd_get_n_fields(update); i++) { + if (pos >= rec_offs_n_fields(offsets)) { + extended++; + } + } + } + + ptr += mach_write_compressed(ptr, upd_get_n_fields(update) - extended); + + for (i = 0; i < upd_get_n_fields(update) - extended; i++) { ulint pos = upd_get_nth_field(update, i)->field_no; diff --git a/trx/trx0roll.c b/trx/trx0roll.c index 1a43e419214..a4bbf7fd652 100644 --- a/trx/trx0roll.c +++ b/trx/trx0roll.c @@ -48,8 +48,8 @@ Created 3/26/1996 Heikki Tuuri rollback */ #define TRX_ROLL_TRUNC_THRESHOLD 1 -/** In crash recovery, the current trx to be rolled back */ -static trx_t* trx_roll_crash_recv_trx = NULL; +/** In crash recovery, the current trx to be rolled back; NULL otherwise */ +static const trx_t* trx_roll_crash_recv_trx = NULL; /** In crash recovery we set this to the undo n:o of the current trx to be rolled back. Then we can print how many % the rollback has progressed. */ diff --git a/trx/trx0trx.c b/trx/trx0trx.c index 75bbe1b342a..98bd9e4ac58 100644 --- a/trx/trx0trx.c +++ b/trx/trx0trx.c @@ -2106,18 +2106,18 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL */ +@return trx or NULL; on match, the trx->xid will be invalidated */ UNIV_INTERN trx_t* trx_get_trx_by_xid( /*===============*/ - XID* xid) /*!< in: X/Open XA transaction identification */ + const XID* xid) /*!< in: X/Open XA transaction identifier */ { trx_t* trx; if (xid == NULL) { - return (NULL); + return(NULL); } mutex_enter(&kernel_mutex); @@ -2130,10 +2130,16 @@ trx_get_trx_by_xid( of gtrid_lenght+bqual_length bytes should be the same */ - if (xid->gtrid_length == trx->xid.gtrid_length + if (trx->conc_state == TRX_PREPARED + && xid->gtrid_length == trx->xid.gtrid_length && xid->bqual_length == trx->xid.bqual_length && memcmp(xid->data, trx->xid.data, xid->gtrid_length + xid->bqual_length) == 0) { + + /* Invalidate the XID, so that subsequent calls + will not find it. */ + memset(&trx->xid, 0, sizeof(trx->xid)); + trx->xid.formatID = -1; break; } @@ -2142,14 +2148,5 @@ trx_get_trx_by_xid( mutex_exit(&kernel_mutex); - if (trx) { - if (trx->conc_state != TRX_PREPARED) { - - return(NULL); - } - - return(trx); - } else { - return(NULL); - } + return(trx); } diff --git a/ut/ut0dbg.c b/ut/ut0dbg.c index 4484e6c36de..7839466e16f 100644 --- a/ut/ut0dbg.c +++ b/ut/ut0dbg.c @@ -79,7 +79,7 @@ ut_dbg_assertion_failed( " or crashes, even\n" "InnoDB: immediately after the mysqld startup, there may be\n" "InnoDB: corruption in the InnoDB tablespace. Please refer to\n" - "InnoDB: " REFMAN "forcing-recovery.html\n" + "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" "InnoDB: about forcing recovery.\n", stderr); #if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT) ut_dbg_stop_threads = TRUE;