diff --git a/mysql-test/suite/innodb/r/innodb_bug30423.result b/mysql-test/suite/innodb/r/innodb_bug30423.result new file mode 100644 index 00000000000..d7b72b1ec2a --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_bug30423.result @@ -0,0 +1,95 @@ +set global innodb_stats_method = default; +select @@innodb_stats_method; +@@innodb_stats_method +nulls_equal +select count(*) from bug30243_3 where org_id is not NULL; +count(*) +20 +select count(*) from bug30243_3 where org_id is NULL; +count(*) +16384 +select count(*) from bug30243_2 where org_id is not NULL; +count(*) +224 +select count(*) from bug30243_2 where org_id is NULL; +count(*) +65536 +select @@innodb_stats_method; +@@innodb_stats_method +nulls_equal +analyze table bug30243_1; +Table Op Msg_type Msg_text +test.bug30243_1 analyze status OK +analyze table bug30243_2; +Table Op Msg_type Msg_text +test.bug30243_2 analyze status OK +analyze table bug30243_3; +Table Op Msg_type Msg_text +test.bug30243_3 analyze status OK +set global innodb_stats_method = "NULL"; +ERROR 42000: Variable 'innodb_stats_method' can't be set to the value of 'NULL' +set global innodb_stats_method = "nulls_ignored"; +select @@innodb_stats_method; +@@innodb_stats_method +nulls_ignored +analyze table bug30243_1; +Table Op Msg_type Msg_text +test.bug30243_1 analyze status OK +analyze table bug30243_2; +Table Op Msg_type Msg_text +test.bug30243_2 analyze status OK +analyze table bug30243_3; +Table Op Msg_type Msg_text +test.bug30243_3 analyze status OK +explain SELECT COUNT(*), 0 +FROM bug30243_1 orgs +LEFT JOIN bug30243_3 sa_opportunities +ON orgs.org_id=sa_opportunities.org_id +LEFT JOIN bug30243_2 contacts +ON orgs.org_id=contacts.org_id ; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE orgs index NULL org_id 4 NULL 128 Using index +1 SIMPLE sa_opportunities ref org_id org_id 5 test.orgs.org_id 1 Using index +1 SIMPLE contacts ref contacts$org_id contacts$org_id 5 test.orgs.org_id 1 Using index +select @@innodb_stats_method; +@@innodb_stats_method +nulls_ignored +set global innodb_stats_method = "nulls_unequal"; +select @@innodb_stats_method; +@@innodb_stats_method +nulls_unequal +analyze table bug30243_1; +Table Op Msg_type Msg_text +test.bug30243_1 analyze status OK +analyze table bug30243_2; +Table Op Msg_type Msg_text +test.bug30243_2 analyze status OK +analyze table bug30243_3; +Table Op Msg_type Msg_text +test.bug30243_3 analyze status OK +explain SELECT COUNT(*), 0 +FROM bug30243_1 orgs +LEFT JOIN bug30243_3 sa_opportunities +ON orgs.org_id=sa_opportunities.org_id +LEFT JOIN bug30243_2 contacts +ON orgs.org_id=contacts.org_id; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE orgs index NULL org_id 4 NULL 128 Using index +1 SIMPLE sa_opportunities ref org_id org_id 5 test.orgs.org_id 1 Using index +1 SIMPLE contacts ref contacts$org_id contacts$org_id 5 test.orgs.org_id 1 Using index +SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL; +COUNT(*) +1024 +set global innodb_stats_method = "nulls_unequal"; +analyze table table_bug30423; +Table Op Msg_type Msg_text +test.table_bug30423 analyze status OK +set global innodb_stats_method = "nulls_ignored"; +analyze table table_bug30423; +Table Op Msg_type Msg_text +test.table_bug30423 analyze status OK +set global innodb_stats_method = nulls_equal; +drop table bug30243_2; +drop table bug30243_1; +drop table bug30243_3; +drop table table_bug30423; diff --git a/mysql-test/suite/innodb/t/innodb_bug30423.test b/mysql-test/suite/innodb/t/innodb_bug30423.test new file mode 100644 index 00000000000..f2a3ee8d099 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_bug30423.test @@ -0,0 +1,211 @@ +# Test for Bug #30423, InnoDBs treatment of NULL in index stats causes +# bad "rows examined" estimates. +# Implemented InnoDB system variable "innodb_stats_method" with +# "nulls_equal" (default), "nulls_unequal", and "nulls_ignored" options. + +-- source include/have_innodb.inc + +let $innodb_stats_method_orig = `select @@innodb_stats_method`; + +# default setting for innodb_stats_method is "nulls_equal" +set global innodb_stats_method = default; + +select @@innodb_stats_method; + +# create three tables, bug30243_1, bug30243_2 and bug30243_3. +# The test scenario is adopted from original bug #30423 report. +# table bug30243_1 and bug30243_3 have many NULL values + +-- disable_result_log +-- disable_query_log + +DROP TABLE IF EXISTS bug30243_1; +CREATE TABLE bug30243_1 ( + org_id int(11) NOT NULL default '0', + UNIQUE KEY (org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +LOCK TABLES bug30243_1 WRITE; +INSERT INTO bug30243_1 VALUES (11),(15),(16),(17),(19),(20),(21),(23),(24), +(25),(26),(27),(28),(29),(30),(31),(32),(33),(34),(35),(37),(38),(40),(41), +(42),(43),(44),(45),(46),(47),(48),(49),(50),(51),(52),(53),(54),(55),(56), +(57),(58),(59),(60),(61),(62),(63),(64),(65),(66),(67),(68),(69),(70),(71), +(72),(73),(74),(75),(76),(77),(78),(79),(80),(81),(82),(83),(84),(85),(86), +(87),(88),(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101), +(102),(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114), +(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126),(127), +(128),(129),(130),(131),(132),(133),(134),(135),(136),(137),(138),(139),(140), +(141),(142),(143),(144),(145); +UNLOCK TABLES; + +DROP TABLE IF EXISTS bug30243_3; +CREATE TABLE bug30243_3 ( + org_id int(11) default NULL, + KEY (org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO bug30243_3 VALUES (NULL); + +begin; +let $i=14; +while ($i) +{ + INSERT INTO bug30243_3 SELECT NULL FROM bug30243_3; + dec $i; +} + +INSERT INTO bug30243_3 VALUES (34),(34),(35),(56),(58),(62),(62),(64),(65),(66),(80),(135),(137),(138),(139),(140),(142),(143),(144),(145); +commit; + +DROP TABLE IF EXISTS bug30243_2; +CREATE TABLE bug30243_2 ( + org_id int(11) default NULL, + KEY `contacts$org_id` (org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO bug30243_2 VALUES (NULL); + +begin; +let $i=16; +while ($i) +{ + INSERT INTO bug30243_2 SELECT NULL FROM bug30243_2; + dec $i; +} + +INSERT INTO bug30243_2 VALUES (11),(15),(16),(17),(20),(21),(23),(24),(25), +(26),(27),(28),(29),(30),(31),(32),(33),(34),(37),(38),(40),(41),(42),(43), +(44),(45),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(48), +(48),(50),(51),(52),(52),(53),(54),(55),(57),(60),(61),(62),(62),(62),(62), +(62),(63),(64),(64),(65),(66),(66),(67),(68),(69),(70),(71),(72),(73),(74), +(75),(76),(77),(78),(79),(80),(80),(81),(82),(83),(84),(85),(86),(87),(88), +(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101),(102), +(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114), +(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126), +(127),(128),(129),(130),(131),(132),(133),(133),(135),(135),(135),(135), +(136),(136),(138),(138),(139),(139),(139),(140),(141),(141),(142),(143), +(143),(145),(145); +commit; + + +-- enable_result_log +-- enable_query_log + +# check tables's value +select count(*) from bug30243_3 where org_id is not NULL; +select count(*) from bug30243_3 where org_id is NULL; + +select count(*) from bug30243_2 where org_id is not NULL; +select count(*) from bug30243_2 where org_id is NULL; + +select @@innodb_stats_method; + +analyze table bug30243_1; +analyze table bug30243_2; +analyze table bug30243_3; + +# Following query plan shows that we over estimate the rows per +# unique value (since there are many NULLs). +# Skip this query log since the stats estimate could vary from runs +-- disable_query_log +-- disable_result_log +explain SELECT COUNT(*), 0 + FROM bug30243_1 orgs + LEFT JOIN bug30243_3 sa_opportunities + ON orgs.org_id=sa_opportunities.org_id + LEFT JOIN bug30243_2 contacts + ON orgs.org_id=contacts.org_id ; +-- enable_query_log +-- enable_result_log + +# following set operation will fail +#--error ER_WRONG_VALUE_FOR_VAR +--error 1231 +set global innodb_stats_method = "NULL"; + +set global innodb_stats_method = "nulls_ignored"; + +select @@innodb_stats_method; + +# Regenerate the stats with "nulls_ignored" option + +analyze table bug30243_1; +analyze table bug30243_2; +analyze table bug30243_3; + +# Following query plan shows that we get the correct rows per +# unique value (should be approximately 1 row per value) +explain SELECT COUNT(*), 0 + FROM bug30243_1 orgs + LEFT JOIN bug30243_3 sa_opportunities + ON orgs.org_id=sa_opportunities.org_id + LEFT JOIN bug30243_2 contacts + ON orgs.org_id=contacts.org_id ; + +select @@innodb_stats_method; + +# Try the "nulls_unequal" option +set global innodb_stats_method = "nulls_unequal"; + +select @@innodb_stats_method; + +analyze table bug30243_1; +analyze table bug30243_2; +analyze table bug30243_3; + +# Following query plan shows that we get the correct rows per +# unique value (~1) +explain SELECT COUNT(*), 0 + FROM bug30243_1 orgs + LEFT JOIN bug30243_3 sa_opportunities + ON orgs.org_id=sa_opportunities.org_id + LEFT JOIN bug30243_2 contacts + ON orgs.org_id=contacts.org_id; + + +# Create a table with all NULL values, make sure the stats calculation +# does not crash with table of all NULL values +-- disable_query_log +CREATE TABLE table_bug30423 ( + org_id int(11) default NULL, + KEY(org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO `table_bug30423` VALUES (NULL); + +begin; +let $i=10; +while ($i) +{ + INSERT INTO table_bug30423 SELECT NULL FROM table_bug30423; + dec $i; +} +commit; + +-- enable_query_log + +SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL; + +# calculate the statistics for the table for "nulls_ignored" and +# "nulls_unequal" option +set global innodb_stats_method = "nulls_unequal"; +analyze table table_bug30423; + +set global innodb_stats_method = "nulls_ignored"; +analyze table table_bug30423; + + +eval set global innodb_stats_method = $innodb_stats_method_orig; + +drop table bug30243_2; + +drop table bug30243_1; + +drop table bug30243_3; + +drop table table_bug30423; diff --git a/mysql-test/suite/innodb/t/innodb_bug56143.test b/mysql-test/suite/innodb/t/innodb_bug56143.test index 3b46f7e1621..c21de09892c 100644 --- a/mysql-test/suite/innodb/t/innodb_bug56143.test +++ b/mysql-test/suite/innodb/t/innodb_bug56143.test @@ -8,6 +8,11 @@ -- disable_query_log -- disable_result_log +if ($VALGRIND_TEST) +{ + call mtr.add_suppression("InnoDB: Warning: a long semaphore wait:"); +} + SET foreign_key_checks=0; DROP TABLE IF EXISTS bug56143_1; diff --git a/mysql-test/suite/sys_vars/r/all_vars.result b/mysql-test/suite/sys_vars/r/all_vars.result index e635f1201a3..418330f078d 100644 --- a/mysql-test/suite/sys_vars/r/all_vars.result +++ b/mysql-test/suite/sys_vars/r/all_vars.result @@ -11,7 +11,9 @@ There should be *no* long test name listed below: select variable_name as `There should be *no* variables listed below:` from t2 left join t1 on variable_name=test_name where test_name is null; There should be *no* variables listed below: +INNODB_STATS_METHOD INNODB_FILE_FORMAT_MAX +INNODB_STATS_METHOD INNODB_FILE_FORMAT_MAX drop table t1; drop table t2; diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c index ff82372b52b..2e544c0552a 100644 --- a/storage/innobase/btr/btr0btr.c +++ b/storage/innobase/btr/btr0btr.c @@ -981,7 +981,7 @@ btr_page_reorganize_low( log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); #ifndef UNIV_HOTBACKUP - temp_block = buf_block_alloc(buf_pool, 0); + temp_block = buf_block_alloc(buf_pool); #else /* !UNIV_HOTBACKUP */ ut_ad(block == back_block1); temp_block = back_block2; diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index a5856479147..8218fc81c13 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -111,6 +111,18 @@ can be released by page reorganize, then it is reorganized */ /*--------------------------------------*/ #define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB part header, in bytes */ + +/** Estimated table level stats from sampled value. +@param value sampled stats +@param index index being sampled +@param sample number of sampled rows +@param ext_size external stored data size +@param not_empty table not empty +@return estimated table wide stats from sampled value */ +#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\ + (((value) * (ib_int64_t) index->stat_n_leaf_pages \ + + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size))) + /* @} */ #endif /* !UNIV_HOTBACKUP */ @@ -3478,10 +3490,55 @@ btr_estimate_n_rows_in_range( } } +/*******************************************************************//** +Record the number of non_null key values in a given index for +each n-column prefix of the index where n < dict_index_get_n_unique(index). +The estimates are eventually stored in the array: +index->stat_n_non_null_key_vals. */ +static +void +btr_record_not_null_field_in_rec( +/*=============================*/ + rec_t* rec, /*!< in: physical record */ + ulint n_unique, /*!< in: dict_index_get_n_unique(index), + number of columns uniquely determine + an index entry */ + const ulint* offsets, /*!< in: rec_get_offsets(rec, index), + its size could be for all fields or + that of "n_unique" */ + ib_int64_t* n_not_null) /*!< in/out: array to record number of + not null rows for n-column prefix */ +{ + ulint i; + + ut_ad(rec_offs_n_fields(offsets) >= n_unique); + + if (n_not_null == NULL) { + return; + } + + for (i = 0; i < n_unique; i++) { + ulint rec_len; + byte* field; + + field = rec_get_nth_field(rec, offsets, i, &rec_len); + + if (rec_len != UNIV_SQL_NULL) { + n_not_null[i]++; + } else { + /* Break if we hit the first NULL value */ + break; + } + } +} + /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is "nulls_ignored", we also record the number of +non-null values for each prefix and store the estimates in +array index->stat_n_non_null_key_vals. */ UNIV_INTERN void btr_estimate_number_of_different_key_vals( @@ -3495,6 +3552,8 @@ btr_estimate_number_of_different_key_vals( ulint matched_fields; ulint matched_bytes; ib_int64_t* n_diff; + ib_int64_t* n_not_null; + ibool stats_null_not_equal; ullint n_sample_pages; /* number of pages to sample */ ulint not_empty_flag = 0; ulint total_external_size = 0; @@ -3503,16 +3562,43 @@ btr_estimate_number_of_different_key_vals( ullint add_on; mtr_t mtr; mem_heap_t* heap = NULL; - ulint offsets_rec_[REC_OFFS_NORMAL_SIZE]; - ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets_rec = offsets_rec_; - ulint* offsets_next_rec= offsets_next_rec_; - rec_offs_init(offsets_rec_); - rec_offs_init(offsets_next_rec_); + ulint* offsets_rec = NULL; + ulint* offsets_next_rec = NULL; n_cols = dict_index_get_n_unique(index); - n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t)); + heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) + * (n_cols + 1) + + dict_index_get_n_fields(index) + * (sizeof *offsets_rec + + sizeof *offsets_next_rec)); + + n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + n_not_null = NULL; + + /* Check srv_innodb_stats_method setting, and decide whether we + need to record non-null value and also decide if NULL is + considered equal (by setting stats_null_not_equal value) */ + switch (srv_innodb_stats_method) { + case SRV_STATS_NULLS_IGNORED: + n_not_null = mem_heap_zalloc(heap, (n_cols + 1) + * sizeof *n_not_null); + /* fall through */ + + case SRV_STATS_NULLS_UNEQUAL: + /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL + case, we will treat NULLs as unequal value */ + stats_null_not_equal = TRUE; + break; + + case SRV_STATS_NULLS_EQUAL: + stats_null_not_equal = FALSE; + break; + + default: + ut_error; + } /* It makes no sense to test more pages than are contained in the index, thus we lower the number if it is too high */ @@ -3529,7 +3615,6 @@ btr_estimate_number_of_different_key_vals( /* We sample some pages in the index to get an estimate */ for (i = 0; i < n_sample_pages; i++) { - rec_t* supremum; mtr_start(&mtr); btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); @@ -3542,18 +3627,25 @@ btr_estimate_number_of_different_key_vals( page = btr_cur_get_page(&cursor); - supremum = page_get_supremum_rec(page); rec = page_rec_get_next(page_get_infimum_rec(page)); - if (rec != supremum) { + if (!page_rec_is_supremum(rec)) { not_empty_flag = 1; offsets_rec = rec_get_offsets(rec, index, offsets_rec, ULINT_UNDEFINED, &heap); + + if (n_not_null) { + btr_record_not_null_field_in_rec( + rec, n_cols, offsets_rec, n_not_null); + } } - while (rec != supremum) { + while (!page_rec_is_supremum(rec)) { rec_t* next_rec = page_rec_get_next(rec); - if (next_rec == supremum) { + if (page_rec_is_supremum(next_rec)) { + total_external_size += + btr_rec_get_externally_stored_len( + rec, offsets_rec); break; } @@ -3561,11 +3653,13 @@ btr_estimate_number_of_different_key_vals( matched_bytes = 0; offsets_next_rec = rec_get_offsets(next_rec, index, offsets_next_rec, - n_cols, &heap); + ULINT_UNDEFINED, + &heap); cmp_rec_rec_with_match(rec, next_rec, offsets_rec, offsets_next_rec, - index, &matched_fields, + index, stats_null_not_equal, + &matched_fields, &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { @@ -3575,6 +3669,12 @@ btr_estimate_number_of_different_key_vals( n_diff[j]++; } + if (n_not_null) { + btr_record_not_null_field_in_rec( + next_rec, n_cols, offsets_next_rec, + n_not_null); + } + total_external_size += btr_rec_get_externally_stored_len( rec, offsets_rec); @@ -3609,10 +3709,6 @@ btr_estimate_number_of_different_key_vals( } } - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - ULINT_UNDEFINED, &heap); - total_external_size += btr_rec_get_externally_stored_len( - rec, offsets_rec); mtr_commit(&mtr); } @@ -3626,13 +3722,9 @@ btr_estimate_number_of_different_key_vals( for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] - = ((n_diff[j] - * (ib_int64_t)index->stat_n_leaf_pages - + n_sample_pages - 1 - + total_external_size - + not_empty_flag) - / (n_sample_pages - + total_external_size)); + = BTR_TABLE_STATS_FROM_SAMPLE( + n_diff[j], index, n_sample_pages, + total_external_size, not_empty_flag); /* If the tree is small, smaller than 10 * n_sample_pages + total_external_size, then @@ -3651,12 +3743,20 @@ btr_estimate_number_of_different_key_vals( } index->stat_n_diff_key_vals[j] += add_on; + + /* Update the stat_n_non_null_key_vals[] with our + sampled result. stat_n_non_null_key_vals[] is created + and initialized to zero in dict_index_add_to_cache(), + along with stat_n_diff_key_vals[] array */ + if (n_not_null != NULL && (j < n_cols)) { + index->stat_n_non_null_key_vals[j] = + BTR_TABLE_STATS_FROM_SAMPLE( + n_not_null[j], index, n_sample_pages, + total_external_size, not_empty_flag); + } } - mem_free(n_diff); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } + mem_heap_free(heap); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ @@ -3946,13 +4046,12 @@ btr_blob_free( && buf_block_get_space(block) == space && buf_block_get_page_no(block) == page_no) { - if (buf_LRU_free_block(&block->page, all, NULL) - != BUF_LRU_FREED + if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED && all && block->page.zip.data) { /* Attempt to deallocate the uncompressed page if the whole block cannot be deallocted. */ - buf_LRU_free_block(&block->page, FALSE, NULL); + buf_LRU_free_block(&block->page, FALSE); } } diff --git a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c index 8f5f9b839b5..d192ee7c132 100644 --- a/storage/innobase/btr/btr0sea.c +++ b/storage/innobase/btr/btr0sea.c @@ -151,7 +151,7 @@ btr_search_check_free_space_in_heap(void) be enough free space in the hash table. */ if (heap->free_block == NULL) { - buf_block_t* block = buf_block_alloc(NULL, 0); + buf_block_t* block = buf_block_alloc(NULL); rw_lock_x_lock(&btr_search_latch); diff --git a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c index 787b4e652a0..75ba832c7e5 100644 --- a/storage/innobase/buf/buf0buddy.c +++ b/storage/innobase/buf/buf0buddy.c @@ -327,7 +327,7 @@ buf_buddy_alloc_low( /* Try replacing an uncompressed page in the buffer pool. */ buf_pool_mutex_exit(buf_pool); - block = buf_LRU_get_free_block(buf_pool, 0); + block = buf_LRU_get_free_block(buf_pool); *lru = TRUE; buf_pool_mutex_enter(buf_pool); diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c index 3ff69d8fd1b..e585b15fc21 100644 --- a/storage/innobase/buf/buf0buf.c +++ b/storage/innobase/buf/buf0buf.c @@ -416,9 +416,9 @@ UNIV_INTERN buf_block_t* buf_block_alloc( /*============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint zip_size) /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ + buf_pool_t* buf_pool) /*!< in/out: buffer pool instance, + or NULL for round-robin selection + of the buffer pool */ { buf_block_t* block; ulint index; @@ -431,7 +431,7 @@ buf_block_alloc( buf_pool = buf_pool_from_array(index); } - block = buf_LRU_get_free_block(buf_pool, zip_size); + block = buf_LRU_get_free_block(buf_pool); buf_block_set_state(block, BUF_BLOCK_MEMORY); @@ -1623,7 +1623,7 @@ shrink_again: buf_LRU_make_block_old(&block->page); dirty++; - } else if (buf_LRU_free_block(&block->page, TRUE, NULL) + } else if (buf_LRU_free_block(&block->page, TRUE) != BUF_LRU_FREED) { nonfree++; } @@ -2366,8 +2366,7 @@ err_exit: mutex_enter(block_mutex); /* Discard the uncompressed page frame if possible. */ - if (buf_LRU_free_block(bpage, FALSE, NULL) - == BUF_LRU_FREED) { + if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) { mutex_exit(block_mutex); goto lookup; @@ -2883,7 +2882,7 @@ wait_until_unfixed: buf_pool_mutex_exit(buf_pool); mutex_exit(&buf_pool->zip_mutex); - block = buf_LRU_get_free_block(buf_pool, 0); + block = buf_LRU_get_free_block(buf_pool); ut_a(block); buf_pool_mutex_enter(buf_pool); @@ -3013,8 +3012,7 @@ wait_until_unfixed: /* Try to evict the block from the buffer pool, to use the insert buffer (change buffer) as much as possible. */ - if (buf_LRU_free_block(&block->page, TRUE, NULL) - == BUF_LRU_FREED) { + if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) { mutex_exit(&block->mutex); if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { /* Set the watch, as it would have @@ -3588,7 +3586,7 @@ buf_page_init_for_read( && UNIV_LIKELY(!recv_recovery_is_on())) { block = NULL; } else { - block = buf_LRU_get_free_block(buf_pool, 0); + block = buf_LRU_get_free_block(buf_pool); ut_ad(block); ut_ad(buf_pool_from_block(block) == buf_pool); } @@ -3794,7 +3792,7 @@ buf_page_create( ut_ad(mtr->state == MTR_ACTIVE); ut_ad(space || !zip_size); - free_block = buf_LRU_get_free_block(buf_pool, 0); + free_block = buf_LRU_get_free_block(buf_pool); fold = buf_page_address_fold(space, offset); diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c index d819782ac9d..910e8386a71 100644 --- a/storage/innobase/buf/buf0lru.c +++ b/storage/innobase/buf/buf0lru.c @@ -603,7 +603,7 @@ buf_LRU_free_from_unzip_LRU_list( ut_ad(block->page.in_LRU_list); mutex_enter(&block->mutex); - freed = buf_LRU_free_block(&block->page, FALSE, NULL); + freed = buf_LRU_free_block(&block->page, FALSE); mutex_exit(&block->mutex); switch (freed) { @@ -666,7 +666,7 @@ buf_LRU_free_from_common_LRU_list( mutex_enter(block_mutex); accessed = buf_page_is_accessed(bpage); - freed = buf_LRU_free_block(bpage, TRUE, NULL); + freed = buf_LRU_free_block(bpage, TRUE); mutex_exit(block_mutex); switch (freed) { @@ -858,9 +858,7 @@ UNIV_INTERN buf_block_t* buf_LRU_get_free_block( /*===================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint zip_size) /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ + buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */ { buf_block_t* block = NULL; ibool freed; @@ -936,31 +934,11 @@ loop: /* If there is a block in the free list, take it */ block = buf_LRU_get_free_only(buf_pool); + buf_pool_mutex_exit(buf_pool); + if (block) { - ut_ad(buf_pool_from_block(block) == buf_pool); - -#ifdef UNIV_DEBUG - block->page.zip.m_start = -#endif /* UNIV_DEBUG */ - block->page.zip.m_end = - block->page.zip.m_nonempty = - block->page.zip.n_blobs = 0; - - if (UNIV_UNLIKELY(zip_size)) { - ibool lru; - page_zip_set_size(&block->page.zip, zip_size); - - block->page.zip.data = buf_buddy_alloc( - buf_pool, zip_size, &lru); - - UNIV_MEM_DESC(block->page.zip.data, zip_size, block); - } else { - page_zip_set_size(&block->page.zip, 0); - block->page.zip.data = NULL; - } - - buf_pool_mutex_exit(buf_pool); + memset(&block->page.zip, 0, sizeof block->page.zip); if (started_monitor) { srv_print_innodb_monitor = mon_value_was; @@ -972,8 +950,6 @@ loop: /* If no block was in the free list, search from the end of the LRU list and try to free a block there */ - buf_pool_mutex_exit(buf_pool); - freed = buf_LRU_search_and_free_block(buf_pool, n_iterations); if (freed > 0) { @@ -1456,12 +1432,8 @@ enum buf_lru_free_block_status buf_LRU_free_block( /*===============*/ buf_page_t* bpage, /*!< in: block to be freed */ - ibool zip, /*!< in: TRUE if should remove also the + ibool zip) /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ - ibool* buf_pool_mutex_released) - /*!< in: pointer to a variable that will - be assigned TRUE if buf_pool_mutex - was temporarily released, or NULL */ { buf_page_t* b = NULL; buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); @@ -1638,10 +1610,6 @@ alloc: b->io_fix = BUF_IO_READ; } - if (buf_pool_mutex_released) { - *buf_pool_mutex_released = TRUE; - } - buf_pool_mutex_exit(buf_pool); mutex_exit(block_mutex); diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c index ee862d8c709..f98d3dbc639 100644 --- a/storage/innobase/dict/dict0dict.c +++ b/storage/innobase/dict/dict0dict.c @@ -1689,6 +1689,12 @@ undo_size_ok: new_index->heap, (1 + dict_index_get_n_unique(new_index)) * sizeof(ib_int64_t)); + + new_index->stat_n_non_null_key_vals = mem_heap_zalloc( + new_index->heap, + (1 + dict_index_get_n_unique(new_index)) + * sizeof(*new_index->stat_n_non_null_key_vals)); + /* Give some sensible values to stat_n_... in case we do not calculate statistics quickly enough */ @@ -4319,6 +4325,10 @@ dict_update_statistics( for (i = dict_index_get_n_unique(index); i; ) { index->stat_n_diff_key_vals[i--] = 1; } + + memset(index->stat_n_non_null_key_vals, 0, + (1 + dict_index_get_n_unique(index)) + * sizeof(*index->stat_n_non_null_key_vals)); } index = dict_table_get_next_index(index); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 191dd53d520..8565809c09f 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -172,6 +172,25 @@ static char* internal_innobase_data_file_path = NULL; static char* innodb_version_str = (char*) INNODB_VERSION_STR; +/** Possible values for system variable "innodb_stats_method". The values +are defined the same as its corresponding MyISAM system variable +"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */ +static const char* innodb_stats_method_names[] = { + "nulls_equal", + "nulls_unequal", + "nulls_ignored", + NullS +}; + +/** Used to define an enumerate type of the system variable innodb_stats_method. +This is the same as "myisam_stats_method_typelib" */ +static TYPELIB innodb_stats_method_typelib = { + array_elements(innodb_stats_method_names) - 1, + "innodb_stats_method_typelib", + innodb_stats_method_names, + NULL +}; + /* The following counter is used to convey information to InnoDB about server activity: in selects it is not sensible to call srv_active_wake_master_thread after each fetch or search, we only do @@ -7711,6 +7730,65 @@ innobase_get_mysql_key_number_for_index( return(0); } + +/*********************************************************************//** +Calculate Record Per Key value. Need to exclude the NULL value if +innodb_stats_method is set to "nulls_ignored" +@return estimated record per key value */ +static +ha_rows +innodb_rec_per_key( +/*===============*/ + dict_index_t* index, /*!< in: dict_index_t structure */ + ulint i, /*!< in: the column we are + calculating rec per key */ + ha_rows records) /*!< in: estimated total records */ +{ + ha_rows rec_per_key; + + ut_ad(i < dict_index_get_n_unique(index)); + + /* Note the stat_n_diff_key_vals[] stores the diff value with + n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */ + if (index->stat_n_diff_key_vals[i + 1] == 0) { + + rec_per_key = records; + } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) { + ib_int64_t num_null; + + /* Number of rows with NULL value in this + field */ + num_null = records - index->stat_n_non_null_key_vals[i]; + + /* In theory, index->stat_n_non_null_key_vals[i] + should always be less than the number of records. + Since this is statistics value, the value could + have slight discrepancy. But we will make sure + the number of null values is not a negative number. */ + num_null = (num_null < 0) ? 0 : num_null; + + /* If the number of NULL values is the same as or + large than that of the distinct values, we could + consider that the table consists mostly of NULL value. + Set rec_per_key to 1. */ + if (index->stat_n_diff_key_vals[i + 1] <= num_null) { + rec_per_key = 1; + } else { + /* Need to exclude rows with NULL values from + rec_per_key calculation */ + rec_per_key = (ha_rows)( + (records - num_null) + / (index->stat_n_diff_key_vals[i + 1] + - num_null)); + } + } else { + rec_per_key = (ha_rows) + (records / index->stat_n_diff_key_vals[i + 1]); + } + + return(rec_per_key); +} + /*********************************************************************//** Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ @@ -7944,13 +8022,8 @@ ha_innobase::info_low( break; } - if (index->stat_n_diff_key_vals[j + 1] == 0) { - - rec_per_key = stats.records; - } else { - rec_per_key = (ha_rows)(stats.records / - index->stat_n_diff_key_vals[j + 1]); - } + rec_per_key = innodb_rec_per_key( + index, j, stats.records); /* Since MySQL seems to favor table scans too much over index searches, we pretend @@ -11173,6 +11246,13 @@ static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering, innodb_change_buffering_validate, innodb_change_buffering_update, "all"); +static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method, + PLUGIN_VAR_RQCMDARG, + "Specifies how InnoDB index statistics collection code should " + "treat NULLs. Possible values are NULLS_EQUAL (default), " + "NULLS_UNEQUAL and NULLS_IGNORED", + NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib); + #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, PLUGIN_VAR_RQCMDARG, @@ -11229,6 +11309,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(stats_on_metadata), MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), + MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(replication_delay), MYSQL_SYSVAR(status_file), MYSQL_SYSVAR(strict_mode), diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c index 0cec0318bf4..e1f61d48a76 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -2585,23 +2585,6 @@ ibuf_contract_ext( if (UNIV_UNLIKELY(ibuf->empty) && UNIV_LIKELY(!srv_shutdown_state)) { -ibuf_is_empty: - -#if 0 /* TODO */ - if (srv_shutdown_state) { - /* If the insert buffer becomes empty during - shutdown, note it in the system tablespace. */ - - trx_sys_set_ibuf_format(TRX_SYS_IBUF_EMPTY); - } - - /* TO DO: call trx_sys_set_ibuf_format() at startup - and whenever ibuf_use is changed to allow buffered - delete-marking or deleting. Never downgrade the - stamped format except when the insert buffer becomes - empty. */ -#endif - return(0); } @@ -2631,7 +2614,7 @@ ibuf_is_empty: mtr_commit(&mtr); btr_pcur_close(&pcur); - goto ibuf_is_empty; + return(0); } sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur), diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index cb65c6bfab7..a9434e2d183 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -466,7 +466,10 @@ btr_estimate_n_rows_in_range( /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is nulls_ignored, we also record the number of +non-null values for each prefix and stored the estimates in +array index->stat_n_non_null_key_vals. */ UNIV_INTERN void btr_estimate_number_of_different_key_vals( diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 46cae25f0f5..7d57cc26288 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -276,9 +276,9 @@ UNIV_INTERN buf_block_t* buf_block_alloc( /*============*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint zip_size); /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ + buf_pool_t* buf_pool); /*!< in: buffer pool instance, + or NULL for round-robin selection + of the buffer pool */ /********************************************************************//** Frees a buffer block which does not contain a file page. */ UNIV_INLINE diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index 7bcec633d9c..9b150188b03 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -111,12 +111,9 @@ enum buf_lru_free_block_status buf_LRU_free_block( /*===============*/ buf_page_t* bpage, /*!< in: block to be freed */ - ibool zip, /*!< in: TRUE if should remove also the + ibool zip) /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ - ibool* buf_pool_mutex_released); - /*!< in: pointer to a variable that will - be assigned TRUE if buf_pool->mutex - was temporarily released, or NULL */ + __attribute__((nonnull)); /******************************************************************//** Try to free a replaceable block. @return TRUE if found and freed */ @@ -153,9 +150,8 @@ UNIV_INTERN buf_block_t* buf_LRU_get_free_block( /*===================*/ - buf_pool_t* buf_pool, /*!< in: preferred buffer pool */ - ulint zip_size); /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ + buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */ + __attribute__((nonnull,warn_unused_result)); /******************************************************************//** Puts a block back to the free list. */ diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 7526e4dc8dc..5757b79d3ed 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -376,6 +376,12 @@ struct dict_index_struct{ dict_get_n_unique(index); we periodically calculate new estimates */ + ib_int64_t* stat_n_non_null_key_vals; + /* approximate number of non-null key values + for this index, for each column where + n < dict_get_n_unique(index); This + is used when innodb_stats_method is + "nulls_ignored". */ ulint stat_index_size; /*!< approximate index size in database pages */ diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h index 0a9edfbfe70..687209575c9 100644 --- a/storage/innobase/include/dict0types.h +++ b/storage/innobase/include/dict0types.h @@ -33,11 +33,6 @@ typedef struct dict_index_struct dict_index_t; typedef struct dict_table_struct dict_table_t; typedef struct dict_foreign_struct dict_foreign_t; -/* A cluster object is a table object with the type field set to -DICT_CLUSTERED */ - -typedef dict_table_t dict_cluster_t; - typedef struct ind_node_struct ind_node_t; typedef struct tab_node_struct tab_node_t; diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h index 2f751a38864..a908521c9f7 100644 --- a/storage/innobase/include/rem0cmp.h +++ b/storage/innobase/include/rem0cmp.h @@ -165,6 +165,10 @@ cmp_rec_rec_with_match( const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index, /*!< in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /*!< in/out: number of already completely matched fields; when the function returns, contains the value the for current diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic index 39ef5f4fba3..63415fe7837 100644 --- a/storage/innobase/include/rem0cmp.ic +++ b/storage/innobase/include/rem0cmp.ic @@ -87,5 +87,5 @@ cmp_rec_rec( ulint match_b = 0; return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, - &match_f, &match_b)); + FALSE, &match_f, &match_b)); } diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 612ff4c6e54..d600ad4a034 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -172,6 +172,11 @@ capacity. PCT_IO(5) -> returns the number of IO operations that is 5% of the max where max is srv_io_capacity. */ #define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0))) +/* The "innodb_stats_method" setting, decides how InnoDB is going +to treat NULL value when collecting statistics. It is not defined +as enum type because the configure option takes unsigned integer type. */ +extern ulong srv_innodb_stats_method; + #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; extern ibool srv_archive_recovery; @@ -418,6 +423,19 @@ enum { in connection with recovery */ }; +/* Alternatives for srv_innodb_stats_method, which could be changed by +setting innodb_stats_method */ +enum srv_stats_method_name_enum { + SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as + equal. This is the default setting + for innodb_stats_method */ + SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as + NOT equal. */ + SRV_STATS_NULLS_IGNORED /* NULL values are ignored */ +}; + +typedef enum srv_stats_method_name_enum srv_stats_method_name_t; + #ifndef UNIV_HOTBACKUP /** Types of threads existing in the system. */ enum srv_thread_type { diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h index 5988fdfb382..9e5c96fa323 100644 --- a/storage/innobase/include/sync0rw.h +++ b/storage/innobase/include/sync0rw.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -555,6 +555,7 @@ UNIV_INTERN void rw_lock_debug_print( /*================*/ + FILE* f, /*!< in: output stream */ rw_lock_debug_t* info); /*!< in: debug struct */ #endif /* UNIV_SYNC_DEBUG */ diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index 78a7a8c4bb0..a293d9b896e 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -141,9 +141,7 @@ struct trx_rseg_struct{ ulint id; /*!< rollback segment id == the index of its slot in the trx system file copy */ mutex_t mutex; /*!< mutex protecting the fields in this - struct except id; NOTE that the latching - order must always be kernel mutex -> - rseg mutex */ + struct except id, which is constant */ ulint space; /*!< space where the rollback segment is header is placed */ ulint zip_size;/* compressed page size of space diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index b4a4aa0ca44..83f6182c347 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -214,12 +214,12 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL */ +@return trx or NULL; on match, the trx->xid will be invalidated */ UNIV_INTERN trx_t * trx_get_trx_by_xid( /*===============*/ - XID* xid); /*!< in: X/Open XA transaction identification */ + const XID* xid); /*!< in: X/Open XA transaction identifier */ /**********************************************************************//** If required, flushes the log to disk if we called trx_commit_for_mysql() with trx->flush_log_later == TRUE. diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 4470d221da9..6d69082e5b5 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -429,7 +429,7 @@ it is read or written. */ /* Use sun_prefetch when compile with Sun Studio */ # define UNIV_EXPECT(expr,value) (expr) # define UNIV_LIKELY_NULL(expr) (expr) -# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr) +# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr) # define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr) #else /* Dummy versions of the macros */ diff --git a/storage/innobase/mem/mem0mem.c b/storage/innobase/mem/mem0mem.c index b5a7ddbd7b2..7727760f1cd 100644 --- a/storage/innobase/mem/mem0mem.c +++ b/storage/innobase/mem/mem0mem.c @@ -347,7 +347,7 @@ mem_heap_create_block( return(NULL); } } else { - buf_block = buf_block_alloc(NULL, 0); + buf_block = buf_block_alloc(NULL); } block = (mem_block_t*) buf_block->frame; diff --git a/storage/innobase/mtr/mtr0log.c b/storage/innobase/mtr/mtr0log.c index 04eeb4391cd..864970cef40 100644 --- a/storage/innobase/mtr/mtr0log.c +++ b/storage/innobase/mtr/mtr0log.c @@ -408,7 +408,7 @@ mlog_parse_string( ptr += 2; if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) - || UNIV_UNLIKELY(len + offset) > UNIV_PAGE_SIZE) { + || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) { recv_sys->found_corrupt_log = TRUE; return(NULL); diff --git a/storage/innobase/page/page0zip.c b/storage/innobase/page/page0zip.c index 98640a8e6fb..4c5371370da 100644 --- a/storage/innobase/page/page0zip.c +++ b/storage/innobase/page/page0zip.c @@ -4440,7 +4440,7 @@ page_zip_reorganize( log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); #ifndef UNIV_HOTBACKUP - temp_block = buf_block_alloc(buf_pool, 0); + temp_block = buf_block_alloc(buf_pool); btr_search_drop_page_hash_index(block); block->check_index_page_at_flush = TRUE; #else /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/rem/rem0cmp.c b/storage/innobase/rem/rem0cmp.c index 35b67992558..04d2c15437b 100644 --- a/storage/innobase/rem/rem0cmp.c +++ b/storage/innobase/rem/rem0cmp.c @@ -862,6 +862,10 @@ cmp_rec_rec_with_match( const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index, /*!< in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /*!< in/out: number of already completely matched fields; when the function returns, contains the value the for current @@ -961,9 +965,13 @@ cmp_rec_rec_with_match( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { - - goto next_field; - + /* This is limited to stats collection, + cannot use it for regular search */ + if (nulls_unequal) { + ret = -1; + } else { + goto next_field; + } } else if (rec2_f_len == UNIV_SQL_NULL) { /* We define the SQL null to be the diff --git a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c index 0c68d6477a4..9827141caec 100644 --- a/storage/innobase/row/row0merge.c +++ b/storage/innobase/row/row0merge.c @@ -2158,13 +2158,15 @@ row_merge_drop_temp_indexes(void) } /*********************************************************************//** -Create a merge file. */ -static -void -row_merge_file_create( -/*==================*/ - merge_file_t* merge_file) /*!< out: merge file structure */ +Creates temperary merge files, and if UNIV_PFS_IO defined, register +the file descriptor with Performance Schema. +@return File descriptor */ +UNIV_INLINE +int +row_merge_file_create_low(void) +/*===========================*/ { + int fd; #ifdef UNIV_PFS_IO /* This temp file open does not go through normal file APIs, add instrumentation to register with @@ -2176,14 +2178,46 @@ row_merge_file_create( "Innodb Merge Temp File", __FILE__, __LINE__); #endif - merge_file->fd = innobase_mysql_tmpfile(); + fd = innobase_mysql_tmpfile(); +#ifdef UNIV_PFS_IO + register_pfs_file_open_end(locker, fd); +#endif + return(fd); +} +/*********************************************************************//** +Create a merge file. */ +static +void +row_merge_file_create( +/*==================*/ + merge_file_t* merge_file) /*!< out: merge file structure */ +{ + merge_file->fd = row_merge_file_create_low(); merge_file->offset = 0; merge_file->n_rec = 0; -#ifdef UNIV_PFS_IO - register_pfs_file_open_end(locker, merge_file->fd); -#endif } +/*********************************************************************//** +Destroy a merge file. And de-register the file from Performance Schema +if UNIV_PFS_IO is defined. */ +UNIV_INLINE +void +row_merge_file_destroy_low( +/*=======================*/ + int fd) /*!< in: merge file descriptor */ +{ +#ifdef UNIV_PFS_IO + struct PSI_file_locker* locker = NULL; + PSI_file_locker_state state; + register_pfs_file_io_begin(&state, locker, + fd, 0, PSI_FILE_CLOSE, + __FILE__, __LINE__); +#endif + close(fd); +#ifdef UNIV_PFS_IO + register_pfs_file_io_end(locker, 0); +#endif +} /*********************************************************************//** Destroy a merge file. */ static @@ -2192,20 +2226,10 @@ row_merge_file_destroy( /*===================*/ merge_file_t* merge_file) /*!< out: merge file structure */ { -#ifdef UNIV_PFS_IO - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - register_pfs_file_io_begin(&state, locker, merge_file->fd, 0, PSI_FILE_CLOSE, - __FILE__, __LINE__); -#endif if (merge_file->fd != -1) { - close(merge_file->fd); + row_merge_file_destroy_low(merge_file->fd); merge_file->fd = -1; } - -#ifdef UNIV_PFS_IO - register_pfs_file_io_end(locker, 0); -#endif } /*********************************************************************//** @@ -2600,7 +2624,7 @@ row_merge_build_indexes( row_merge_file_create(&merge_files[i]); } - tmpfd = innobase_mysql_tmpfile(); + tmpfd = row_merge_file_create_low(); /* Reset the MySQL row buffer that is used when reporting duplicate keys. */ @@ -2642,7 +2666,7 @@ row_merge_build_indexes( } func_exit: - close(tmpfd); + row_merge_file_destroy_low(tmpfd); for (i = 0; i < n_indexes; i++) { row_merge_file_destroy(&merge_files[i]); diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c index 03ed40c6647..b552d879e1c 100644 --- a/storage/innobase/row/row0upd.c +++ b/storage/innobase/row/row0upd.c @@ -1252,6 +1252,10 @@ row_upd_changes_ord_field_binary( || dfield_is_null(dfield)) { /* do nothing special */ } else if (UNIV_LIKELY_NULL(ext)) { + /* Silence a compiler warning without + silencing a Valgrind error. */ + dfield_len = 0; + UNIV_MEM_INVALID(&dfield_len, sizeof dfield_len); /* See if the column is stored externally. */ buf = row_ext_lookup(ext, col_no, &dfield_len); diff --git a/storage/innobase/row/row0vers.c b/storage/innobase/row/row0vers.c index 2e12361312c..91c230db135 100644 --- a/storage/innobase/row/row0vers.c +++ b/storage/innobase/row/row0vers.c @@ -669,11 +669,15 @@ row_vers_build_for_semi_consistent_read( mutex_enter(&kernel_mutex); version_trx = trx_get_on_id(version_trx_id); + if (version_trx + && (version_trx->conc_state == TRX_COMMITTED_IN_MEMORY + || version_trx->conc_state == TRX_NOT_STARTED)) { + + version_trx = NULL; + } mutex_exit(&kernel_mutex); - if (!version_trx - || version_trx->conc_state == TRX_NOT_STARTED - || version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) { + if (!version_trx) { /* We found a version that belongs to a committed transaction: return it. */ diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 1c57d6960d0..826145005a4 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -275,6 +275,11 @@ UNIV_INTERN ulong srv_purge_batch_size = 20; /* variable counts amount of data read in total (in bytes) */ UNIV_INTERN ulint srv_data_read = 0; +/* Internal setting for "innodb_stats_method". Decides how InnoDB treats +NULL value when collecting statistics. By default, it is set to +SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ +ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL; + /* here we count the amount of data written in total (in bytes) */ UNIV_INTERN ulint srv_data_written = 0; diff --git a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c index ce9d42c4921..3ecbce72ebd 100644 --- a/storage/innobase/sync/sync0arr.c +++ b/storage/innobase/sync/sync0arr.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -714,7 +714,7 @@ print: fprintf(stderr, "rw-lock %p ", (void*) lock); sync_array_cell_print(stderr, cell); - rw_lock_debug_print(debug); + rw_lock_debug_print(stderr, debug); return(TRUE); } } diff --git a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c index 783cff3bf82..295a010f9d7 100644 --- a/storage/innobase/sync/sync0rw.c +++ b/storage/innobase/sync/sync0rw.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -936,7 +936,7 @@ rw_lock_list_print_info( info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { - rw_lock_debug_print(info); + rw_lock_debug_print(file, info); info = UT_LIST_GET_NEXT(list, info); } } @@ -984,7 +984,7 @@ rw_lock_print( info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { - rw_lock_debug_print(info); + rw_lock_debug_print(stderr, info); info = UT_LIST_GET_NEXT(list, info); } } @@ -996,28 +996,29 @@ UNIV_INTERN void rw_lock_debug_print( /*================*/ + FILE* f, /*!< in: output stream */ rw_lock_debug_t* info) /*!< in: debug struct */ { ulint rwt; rwt = info->lock_type; - fprintf(stderr, "Locked: thread %lu file %s line %lu ", + fprintf(f, "Locked: thread %lu file %s line %lu ", (ulong) os_thread_pf(info->thread_id), info->file_name, (ulong) info->line); if (rwt == RW_LOCK_SHARED) { - fputs("S-LOCK", stderr); + fputs("S-LOCK", f); } else if (rwt == RW_LOCK_EX) { - fputs("X-LOCK", stderr); + fputs("X-LOCK", f); } else if (rwt == RW_LOCK_WAIT_EX) { - fputs("WAIT X-LOCK", stderr); + fputs("WAIT X-LOCK", f); } else { ut_error; } if (info->pass != 0) { - fprintf(stderr, " pass value %lu", (ulong) info->pass); + fprintf(f, " pass value %lu", (ulong) info->pass); } - putc('\n', stderr); + putc('\n', f); } /***************************************************************//** diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c index 761cc4c805e..453314f465d 100644 --- a/storage/innobase/sync/sync0sync.c +++ b/storage/innobase/sync/sync0sync.c @@ -222,21 +222,40 @@ UNIV_INTERN mysql_pfs_key_t mutex_list_mutex_key; UNIV_INTERN ibool sync_order_checks_on = FALSE; #endif /* UNIV_SYNC_DEBUG */ -/** Mutexes or rw-locks held by a thread */ -struct sync_thread_struct{ - os_thread_id_t id; /*!< OS thread id */ - sync_level_t* levels; /*!< level array for this thread; if - this is NULL this slot is unused */ +/** Number of slots reserved for each OS thread in the sync level array */ +static const ulint SYNC_THREAD_N_LEVELS = 10000; + +typedef struct sync_arr_struct sync_arr_t; + +/** Array for tracking sync levels per thread. */ +struct sync_arr_struct { + ulint in_use; /*!< Number of active cells */ + ulint n_elems; /*!< Number of elements in the array */ + ulint max_elems; /*!< Maximum elements */ + ulint next_free; /*!< ULINT_UNDEFINED or index of next + free slot */ + sync_level_t* elems; /*!< Array elements */ }; -/** Number of slots reserved for each OS thread in the sync level array */ -#define SYNC_THREAD_N_LEVELS 10000 +/** Mutexes or rw-locks held by a thread */ +struct sync_thread_struct{ + os_thread_id_t id; /*!< OS thread id */ + sync_arr_t* levels; /*!< level array for this thread; if + this is NULL this slot is unused */ +}; /** An acquired mutex or rw-lock and its level in the latching order */ struct sync_level_struct{ - void* latch; /*!< pointer to a mutex or an rw-lock; NULL means that - the slot is empty */ - ulint level; /*!< level of the latch in the latching order */ + void* latch; /*!< pointer to a mutex or an + rw-lock; NULL means that + the slot is empty */ + ulint level; /*!< level of the latch in the + latching order. This field is + overloaded to serve as a node in a + linked list of free nodes too. When + latch == NULL then this will contain + the ordinal value of the next free + element */ }; /******************************************************************//** @@ -745,27 +764,28 @@ mutex_n_reserved(void) /*==================*/ { mutex_t* mutex; - ulint count = 0; + ulint count = 0; mutex_enter(&mutex_list_mutex); - mutex = UT_LIST_GET_FIRST(mutex_list); + for (mutex = UT_LIST_GET_FIRST(mutex_list); + mutex != NULL; + mutex = UT_LIST_GET_NEXT(list, mutex)) { - while (mutex != NULL) { if (mutex_get_lock_word(mutex) != 0) { count++; } - - mutex = UT_LIST_GET_NEXT(list, mutex); } mutex_exit(&mutex_list_mutex); ut_a(count >= 1); - return(count - 1); /* Subtract one, because this function itself - was holding one mutex (mutex_list_mutex) */ + /* Subtract one, because this function itself was holding + one mutex (mutex_list_mutex) */ + + return(count - 1); } /******************************************************************//** @@ -780,20 +800,6 @@ sync_all_freed(void) return(mutex_n_reserved() + rw_lock_n_locked() == 0); } -/******************************************************************//** -Gets the value in the nth slot in the thread level arrays. -@return pointer to thread slot */ -static -sync_thread_t* -sync_thread_level_arrays_get_nth( -/*=============================*/ - ulint n) /*!< in: slot number */ -{ - ut_ad(n < OS_THREAD_MAX_N); - - return(sync_thread_level_arrays + n); -} - /******************************************************************//** Looks for the thread slot for the calling thread. @return pointer to thread slot, NULL if not found */ @@ -803,15 +809,15 @@ sync_thread_level_arrays_find_slot(void) /*====================================*/ { - sync_thread_t* slot; - os_thread_id_t id; ulint i; + os_thread_id_t id; id = os_thread_get_curr_id(); for (i = 0; i < OS_THREAD_MAX_N; i++) { + sync_thread_t* slot; - slot = sync_thread_level_arrays_get_nth(i); + slot = &sync_thread_level_arrays[i]; if (slot->levels && os_thread_eq(slot->id, id)) { @@ -831,12 +837,12 @@ sync_thread_level_arrays_find_free(void) /*====================================*/ { - sync_thread_t* slot; ulint i; for (i = 0; i < OS_THREAD_MAX_N; i++) { + sync_thread_t* slot; - slot = sync_thread_level_arrays_get_nth(i); + slot = &sync_thread_level_arrays[i]; if (slot->levels == NULL) { @@ -848,19 +854,44 @@ sync_thread_level_arrays_find_free(void) } /******************************************************************//** -Gets the value in the nth slot in the thread level array. -@return pointer to level slot */ +Print warning. */ static -sync_level_t* -sync_thread_levels_get_nth( -/*=======================*/ - sync_level_t* arr, /*!< in: pointer to level array for an OS - thread */ - ulint n) /*!< in: slot number */ +void +sync_print_warning( +/*===============*/ + const sync_level_t* slot) /*!< in: slot for which to + print warning */ { - ut_ad(n < SYNC_THREAD_N_LEVELS); + mutex_t* mutex; - return(arr + n); + mutex = slot->latch; + + if (mutex->magic_n == MUTEX_MAGIC_N) { + fprintf(stderr, + "Mutex created at %s %lu\n", + mutex->cfile_name, (ulong) mutex->cline); + + if (mutex_get_lock_word(mutex) != 0) { + ulint line; + const char* file_name; + os_thread_id_t thread_id; + + mutex_get_debug_info( + mutex, &file_name, &line, &thread_id); + + fprintf(stderr, + "InnoDB: Locked mutex:" + " addr %p thread %ld file %s line %ld\n", + (void*) mutex, os_thread_pf(thread_id), + file_name, (ulong) line); + } else { + fputs("Not locked\n", stderr); + } + } else { + rw_lock_t* lock = slot->latch; + + rw_lock_print(lock); + } } /******************************************************************//** @@ -871,69 +902,29 @@ static ibool sync_thread_levels_g( /*=================*/ - sync_level_t* arr, /*!< in: pointer to level array for an OS + sync_arr_t* arr, /*!< in: pointer to level array for an OS thread */ ulint limit, /*!< in: level limit */ ulint warn) /*!< in: TRUE=display a diagnostic message */ { - sync_level_t* slot; - rw_lock_t* lock; - mutex_t* mutex; ulint i; - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { + for (i = 0; i < arr->n_elems; i++) { + const sync_level_t* slot; - slot = sync_thread_levels_get_nth(arr, i); - - if (slot->latch != NULL) { - if (slot->level <= limit) { - - if (!warn) { - - return(FALSE); - } - - lock = slot->latch; - mutex = slot->latch; + slot = &arr->elems[i]; + if (slot->latch != NULL && slot->level <= limit) { + if (warn) { fprintf(stderr, "InnoDB: sync levels should be" " > %lu but a level is %lu\n", (ulong) limit, (ulong) slot->level); - if (mutex->magic_n == MUTEX_MAGIC_N) { - fprintf(stderr, - "Mutex created at %s %lu\n", - mutex->cfile_name, - (ulong) mutex->cline); - - if (mutex_get_lock_word(mutex) != 0) { - const char* file_name; - ulint line; - os_thread_id_t thread_id; - - mutex_get_debug_info( - mutex, &file_name, - &line, &thread_id); - - fprintf(stderr, - "InnoDB: Locked mutex:" - " addr %p thread %ld" - " file %s line %ld\n", - (void*) mutex, - os_thread_pf( - thread_id), - file_name, - (ulong) line); - } else { - fputs("Not locked\n", stderr); - } - } else { - rw_lock_print(lock); - } - - return(FALSE); + sync_print_warning(slot); } + + return(FALSE); } } @@ -942,31 +933,29 @@ sync_thread_levels_g( /******************************************************************//** Checks if the level value is stored in the level array. -@return TRUE if stored */ +@return slot if found or NULL */ static -ibool +const sync_level_t* sync_thread_levels_contain( /*=======================*/ - sync_level_t* arr, /*!< in: pointer to level array for an OS + sync_arr_t* arr, /*!< in: pointer to level array for an OS thread */ ulint level) /*!< in: level */ { - sync_level_t* slot; ulint i; - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { + for (i = 0; i < arr->n_elems; i++) { + const sync_level_t* slot; - slot = sync_thread_levels_get_nth(arr, i); + slot = &arr->elems[i]; - if (slot->latch != NULL) { - if (slot->level == level) { + if (slot->latch != NULL && slot->level == level) { - return(TRUE); - } + return(slot); } } - return(FALSE); + return(NULL); } /******************************************************************//** @@ -980,10 +969,9 @@ sync_thread_levels_contains( ulint level) /*!< in: latching order level (SYNC_DICT, ...)*/ { - sync_level_t* arr; - sync_thread_t* thread_slot; - sync_level_t* slot; ulint i; + sync_arr_t* arr; + sync_thread_t* thread_slot; if (!sync_order_checks_on) { @@ -1003,9 +991,10 @@ sync_thread_levels_contains( arr = thread_slot->levels; - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { + for (i = 0; i < arr->n_elems; i++) { + sync_level_t* slot; - slot = sync_thread_levels_get_nth(arr, i); + slot = &arr->elems[i]; if (slot->latch != NULL && slot->level == level) { @@ -1031,10 +1020,9 @@ sync_thread_levels_nonempty_gen( also purge_is_running mutex is allowed */ { - sync_level_t* arr; - sync_thread_t* thread_slot; - sync_level_t* slot; ulint i; + sync_arr_t* arr; + sync_thread_t* thread_slot; if (!sync_order_checks_on) { @@ -1054,9 +1042,10 @@ sync_thread_levels_nonempty_gen( arr = thread_slot->levels; - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { + for (i = 0; i < arr->n_elems; ++i) { + const sync_level_t* slot; - slot = sync_thread_levels_get_nth(arr, i); + slot = &arr->elems[i]; if (slot->latch != NULL && (!dict_mutex_allowed @@ -1098,10 +1087,10 @@ sync_thread_add_level( ulint level) /*!< in: level in the latching order; if SYNC_LEVEL_VARYING, nothing is done */ { - sync_level_t* array; - sync_level_t* slot; - sync_thread_t* thread_slot; ulint i; + sync_level_t* slot; + sync_arr_t* array; + sync_thread_t* thread_slot; if (!sync_order_checks_on) { @@ -1126,20 +1115,23 @@ sync_thread_add_level( thread_slot = sync_thread_level_arrays_find_slot(); if (thread_slot == NULL) { + ulint sz; + + sz = sizeof(*array) + + (sizeof(*array->elems) * SYNC_THREAD_N_LEVELS); + /* We have to allocate the level array for a new thread */ - array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS); + array = calloc(sz, sizeof(char)); + ut_a(array != NULL); + + array->next_free = ULINT_UNDEFINED; + array->max_elems = SYNC_THREAD_N_LEVELS; + array->elems = (sync_level_t*) &array[1]; thread_slot = sync_thread_level_arrays_find_free(); - thread_slot->id = os_thread_get_curr_id(); thread_slot->levels = array; - - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { - - slot = sync_thread_levels_get_nth(array, i); - - slot->latch = NULL; - } + thread_slot->id = os_thread_get_curr_id(); } array = thread_slot->levels; @@ -1303,19 +1295,26 @@ sync_thread_add_level( ut_error; } - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { + if (array->next_free == ULINT_UNDEFINED) { + ut_a(array->n_elems < array->max_elems); - slot = sync_thread_levels_get_nth(array, i); - - if (slot->latch == NULL) { - slot->latch = latch; - slot->level = level; - - break; - } + i = array->n_elems++; + } else { + i = array->next_free; + array->next_free = array->elems[i].level; } - ut_a(i < SYNC_THREAD_N_LEVELS); + ut_a(i < array->n_elems); + ut_a(i != ULINT_UNDEFINED); + + ++array->in_use; + + slot = &array->elems[i]; + + ut_a(slot->latch == NULL); + + slot->latch = latch; + slot->level = level; mutex_exit(&sync_thread_mutex); } @@ -1331,8 +1330,7 @@ sync_thread_reset_level( /*====================*/ void* latch) /*!< in: pointer to a mutex or an rw-lock */ { - sync_level_t* array; - sync_level_t* slot; + sync_arr_t* array; sync_thread_t* thread_slot; ulint i; @@ -1363,17 +1361,37 @@ sync_thread_reset_level( array = thread_slot->levels; - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { + for (i = 0; i < array->n_elems; i++) { + sync_level_t* slot; - slot = sync_thread_levels_get_nth(array, i); + slot = &array->elems[i]; - if (slot->latch == latch) { - slot->latch = NULL; - - mutex_exit(&sync_thread_mutex); - - return(TRUE); + if (slot->latch != latch) { + continue; } + + slot->latch = NULL; + + /* Update the free slot list. See comment in sync_level_t + for the level field. */ + slot->level = array->next_free; + array->next_free = i; + + ut_a(array->in_use >= 1); + --array->in_use; + + /* If all cells are idle then reset the free + list. The assumption is that this will save + time when we need to scan up to n_elems. */ + + if (array->in_use == 0) { + array->n_elems = 0; + array->next_free = ULINT_UNDEFINED; + } + + mutex_exit(&sync_thread_mutex); + + return(TRUE); } if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) { @@ -1403,11 +1421,6 @@ void sync_init(void) /*===========*/ { -#ifdef UNIV_SYNC_DEBUG - sync_thread_t* thread_slot; - ulint i; -#endif /* UNIV_SYNC_DEBUG */ - ut_a(sync_initialized == FALSE); sync_initialized = TRUE; @@ -1421,13 +1434,10 @@ sync_init(void) /* Create the thread latch level array where the latch levels are stored for each OS thread */ - sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N - * sizeof(sync_thread_t)); - for (i = 0; i < OS_THREAD_MAX_N; i++) { + sync_thread_level_arrays = calloc( + sizeof(sync_thread_t), OS_THREAD_MAX_N); + ut_a(sync_thread_level_arrays != NULL); - thread_slot = sync_thread_level_arrays_get_nth(i); - thread_slot->levels = NULL; - } #endif /* UNIV_SYNC_DEBUG */ /* Init the mutex list and create the mutex to protect it. */ @@ -1454,6 +1464,34 @@ sync_init(void) #endif /* UNIV_SYNC_DEBUG */ } +#ifdef UNIV_SYNC_DEBUG +/******************************************************************//** +Frees all debug memory. */ +static +void +sync_thread_level_arrays_free(void) +/*===============================*/ + +{ + ulint i; + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + sync_thread_t* slot; + + slot = &sync_thread_level_arrays[i]; + + /* If this slot was allocated then free the slot memory too. */ + if (slot->levels != NULL) { + free(slot->levels); + slot->levels = NULL; + } + } + + free(sync_thread_level_arrays); + sync_thread_level_arrays = NULL; +} +#endif /* UNIV_SYNC_DEBUG */ + /******************************************************************//** Frees the resources in InnoDB's own synchronization data structures. Use os_sync_free() after calling this. */ @@ -1466,17 +1504,20 @@ sync_close(void) sync_array_free(sync_primary_wait_array); - mutex = UT_LIST_GET_FIRST(mutex_list); + for (mutex = UT_LIST_GET_FIRST(mutex_list); + mutex != NULL; + /* No op */) { - while (mutex) { #ifdef UNIV_MEM_DEBUG if (mutex == &mem_hash_mutex) { mutex = UT_LIST_GET_NEXT(list, mutex); continue; } #endif /* UNIV_MEM_DEBUG */ + mutex_free(mutex); - mutex = UT_LIST_GET_FIRST(mutex_list); + + mutex = UT_LIST_GET_FIRST(mutex_list); } mutex_free(&mutex_list_mutex); @@ -1485,6 +1526,8 @@ sync_close(void) /* Switch latching order checks on in sync0sync.c */ sync_order_checks_on = FALSE; + + sync_thread_level_arrays_free(); #endif /* UNIV_SYNC_DEBUG */ sync_initialized = FALSE; diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c index 7f2ca1f1471..4bbcee210b0 100644 --- a/storage/innobase/trx/trx0trx.c +++ b/storage/innobase/trx/trx0trx.c @@ -2004,18 +2004,18 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL */ +@return trx or NULL; on match, the trx->xid will be invalidated */ UNIV_INTERN trx_t* trx_get_trx_by_xid( /*===============*/ - XID* xid) /*!< in: X/Open XA transaction identification */ + const XID* xid) /*!< in: X/Open XA transaction identifier */ { trx_t* trx; if (xid == NULL) { - return (NULL); + return(NULL); } mutex_enter(&kernel_mutex); @@ -2028,10 +2028,16 @@ trx_get_trx_by_xid( of gtrid_length+bqual_length bytes should be the same */ - if (xid->gtrid_length == trx->xid.gtrid_length + if (trx->conc_state == TRX_PREPARED + && xid->gtrid_length == trx->xid.gtrid_length && xid->bqual_length == trx->xid.bqual_length && memcmp(xid->data, trx->xid.data, xid->gtrid_length + xid->bqual_length) == 0) { + + /* Invalidate the XID, so that subsequent calls + will not find it. */ + memset(&trx->xid, 0, sizeof(trx->xid)); + trx->xid.formatID = -1; break; } @@ -2040,14 +2046,5 @@ trx_get_trx_by_xid( mutex_exit(&kernel_mutex); - if (trx) { - if (trx->conc_state != TRX_PREPARED) { - - return(NULL); - } - - return(trx); - } else { - return(NULL); - } + return(trx); }