diff --git a/mysql-test/suite/innodb/t/innodb-zip.test b/mysql-test/suite/innodb/t/innodb-zip.test index eb517563416..8ba83517b44 100644 --- a/mysql-test/suite/innodb/t/innodb-zip.test +++ b/mysql-test/suite/innodb/t/innodb-zip.test @@ -85,7 +85,8 @@ SELECT table_schema, table_name, row_format FROM information_schema.tables WHERE engine='innodb'; drop table t1,t2; -# The following should fail even in non-strict mode. +# The following should fail in non-strict mode too. +# (The fix of Bug #50945 only affects REDUNDANT and COMPACT tables.) SET SESSION innodb_strict_mode = off; --error ER_TOO_BIG_ROWSIZE CREATE TABLE t1( diff --git a/mysql-test/suite/innodb/t/innodb_bug36169.test b/mysql-test/suite/innodb/t/innodb_bug36169.test index 5bf55193b5c..5bbbf45d484 100644 --- a/mysql-test/suite/innodb/t/innodb_bug36169.test +++ b/mysql-test/suite/innodb/t/innodb_bug36169.test @@ -24,6 +24,7 @@ SET GLOBAL innodb_file_per_table=ON; # Generating 10 tables # Creating a table with 94 columns and 24 indexes DROP TABLE IF EXISTS `table0`; +set innodb_strict_mode=on; --error ER_TOO_BIG_ROWSIZE CREATE TABLE IF NOT EXISTS `table0` (`col0` BOOL, diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c index 6cc9b48936a..97a6bf859c9 100644 --- a/storage/innobase/btr/btr0btr.c +++ b/storage/innobase/btr/btr0btr.c @@ -1455,11 +1455,11 @@ Calculates a split record such that the tuple will certainly fit on its half-page when the split is performed. We assume in this function only that the cursor page has at least one user record. @return split record, or NULL if tuple will be the first record on -upper half-page */ +the lower or upper half-page (determined by btr_page_tuple_smaller()) */ static rec_t* -btr_page_get_sure_split_rec( -/*========================*/ +btr_page_get_split_rec( +/*===================*/ btr_cur_t* cursor, /*!< in: cursor at which insert should be made */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext) /*!< in: number of externally stored columns */ @@ -1835,6 +1835,37 @@ btr_attach_half_pages( btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr); } +/*************************************************************//** +Determine if a tuple is smaller than any record on the page. +@return TRUE if smaller */ +static +ibool +btr_page_tuple_smaller( +/*===================*/ + btr_cur_t* cursor, /*!< in: b-tree cursor */ + const dtuple_t* tuple, /*!< in: tuple to consider */ + ulint* offsets,/*!< in/out: temporary storage */ + ulint n_uniq, /*!< in: number of unique fields + in the index page records */ + mem_heap_t** heap) /*!< in/out: heap for offsets */ +{ + buf_block_t* block; + const rec_t* first_rec; + page_cur_t pcur; + + /* Read the first user record in the page. */ + block = btr_cur_get_block(cursor); + page_cur_set_before_first(block, &pcur); + page_cur_move_to_next(&pcur); + first_rec = page_cur_get_rec(&pcur); + + offsets = rec_get_offsets( + first_rec, cursor->index, offsets, + n_uniq, heap); + + return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0); +} + /*************************************************************//** Splits an index page to halves and inserts the tuple. It is assumed that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is @@ -1909,49 +1940,45 @@ func_start: if (n_iterations > 0) { direction = FSP_UP; hint_page_no = page_no + 1; - split_rec = btr_page_get_sure_split_rec(cursor, tuple, n_ext); + split_rec = btr_page_get_split_rec(cursor, tuple, n_ext); + if (UNIV_UNLIKELY(split_rec == NULL)) { + insert_left = btr_page_tuple_smaller( + cursor, tuple, offsets, n_uniq, &heap); + } } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { direction = FSP_UP; hint_page_no = page_no + 1; + insert_left = FALSE; } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) { direction = FSP_DOWN; hint_page_no = page_no - 1; + ut_ad(split_rec); } else { direction = FSP_UP; hint_page_no = page_no + 1; - if (page_get_n_recs(page) == 1) { - page_cur_t pcur; + /* If there is only one record in the index page, we + can't split the node in the middle by default. We need + to determine whether the new record will be inserted + to the left or right. */ - /* There is only one record in the index page - therefore we can't split the node in the middle - by default. We need to determine whether the - new record will be inserted to the left or right. */ - - /* Read the first (and only) record in the page. */ - page_cur_set_before_first(block, &pcur); - page_cur_move_to_next(&pcur); - first_rec = page_cur_get_rec(&pcur); - - offsets = rec_get_offsets( - first_rec, cursor->index, offsets, - n_uniq, &heap); - - /* If the new record is less than the existing record - the split in the middle will copy the existing - record to the new node. */ - if (cmp_dtuple_rec(tuple, first_rec, offsets) < 0) { - split_rec = page_get_middle_rec(page); - } else { - split_rec = NULL; - } - } else { + if (page_get_n_recs(page) > 1) { split_rec = page_get_middle_rec(page); + } else if (btr_page_tuple_smaller(cursor, tuple, + offsets, n_uniq, &heap)) { + split_rec = page_rec_get_next( + page_get_infimum_rec(page)); + } else { + split_rec = NULL; + insert_left = FALSE; } } + /* At this point, insert_left is initialized if split_rec == NULL + and may be uninitialized otherwise. */ + /* 2. Allocate a new page to the index */ new_block = btr_page_alloc(cursor->index, hint_page_no, direction, btr_page_get_level(page, mtr), mtr); @@ -1978,11 +2005,11 @@ func_start: avoid further splits by inserting the record to an empty page. */ split_rec = NULL; - goto insert_right; + goto insert_empty; } } else { -insert_right: - insert_left = FALSE; +insert_empty: + ut_ad(!split_rec); buf = mem_alloc(rec_get_converted_size(cursor->index, tuple, n_ext)); @@ -2019,7 +2046,17 @@ insert_right: } /* 5. Move then the records to the new page */ - if (direction == FSP_DOWN) { + if (direction == FSP_DOWN +#ifdef UNIV_BTR_AVOID_COPY + && page_rec_is_supremum(move_limit)) { + /* Instead of moving all records, make the new page + the empty page. */ + + left_block = block; + right_block = new_block; + } else if (direction == FSP_DOWN +#endif /* UNIV_BTR_AVOID_COPY */ + ) { /* fputs("Split left\n", stderr); */ if (0 @@ -2062,6 +2099,14 @@ insert_right: right_block = block; lock_update_split_left(right_block, left_block); +#ifdef UNIV_BTR_AVOID_COPY + } else if (!split_rec) { + /* Instead of moving all records, make the new page + the empty page. */ + + left_block = new_block; + right_block = block; +#endif /* UNIV_BTR_AVOID_COPY */ } else { /* fputs("Split right\n", stderr); */ diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 57d6973f623..9293fc151ae 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -1184,7 +1184,6 @@ btr_cur_optimistic_insert( ibool inherit; ulint zip_size; ulint rec_size; - mem_heap_t* heap = NULL; ulint err; *big_rec = NULL; @@ -1264,10 +1263,6 @@ btr_cur_optimistic_insert( index, entry, big_rec_vec); } - if (heap) { - mem_heap_free(heap); - } - return(DB_TOO_BIG_RECORD); } } @@ -1290,15 +1285,11 @@ fail_err: dtuple_convert_back_big_rec(index, entry, big_rec_vec); } - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); } if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT - || max_size < rec_size) + || max_size < rec_size) && UNIV_LIKELY(page_get_n_recs(page) > 1) && page_get_max_insert_size(page, 1) < rec_size) { @@ -1364,10 +1355,6 @@ fail_err: } } - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - #ifdef BTR_CUR_HASH_ADAPT if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) { btr_search_update_hash_node_on_insert(cursor); diff --git a/storage/innobase/data/data0data.c b/storage/innobase/data/data0data.c index e3c1f1b4f23..0715b49bf9c 100644 --- a/storage/innobase/data/data0data.c +++ b/storage/innobase/data/data0data.c @@ -666,6 +666,21 @@ dtuple_convert_big_rec( goto skip_field; } + /* In DYNAMIC and COMPRESSED format, store + locally any non-BLOB columns whose maximum + length does not exceed 256 bytes. This is + because there is no room for the "external + storage" flag when the maximum length is 255 + bytes or less. This restriction trivially + holds in REDUNDANT and COMPACT format, because + there we always store locally columns whose + length is up to local_len == 788 bytes. + @see rec_init_offsets_comp_ordinary */ + if (ifield->col->mtype != DATA_BLOB + && ifield->col->len < 256) { + goto skip_field; + } + longest_i = i; longest = savings; diff --git a/storage/innobase/dict/dict0boot.c b/storage/innobase/dict/dict0boot.c index 70b5bfa99f7..45d57b8c619 100644 --- a/storage/innobase/dict/dict0boot.c +++ b/storage/innobase/dict/dict0boot.c @@ -368,8 +368,8 @@ dict_boot(void) #if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2 #error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2" #endif -#if DICT_SYS_INDEXES_NAME_FIELD != 1 + 2 -#error "DICT_SYS_INDEXES_NAME_FIELD != 1 + 2" +#if DICT_SYS_INDEXES_NAME_FIELD != 2 + 2 +#error "DICT_SYS_INDEXES_NAME_FIELD != 2 + 2" #endif table->id = DICT_INDEXES_ID; diff --git a/storage/innobase/dict/dict0crea.c b/storage/innobase/dict/dict0crea.c index 4ba7cd8a48c..653bff4bef6 100644 --- a/storage/innobase/dict/dict0crea.c +++ b/storage/innobase/dict/dict0crea.c @@ -1105,8 +1105,11 @@ dict_create_index_step( dulint index_id = node->index->id; - err = dict_index_add_to_cache(node->table, node->index, - FIL_NULL, TRUE); + err = dict_index_add_to_cache( + node->table, node->index, FIL_NULL, + trx_is_strict(trx) + || dict_table_get_format(node->table) + >= DICT_TF_FORMAT_ZIP); node->index = dict_index_get_if_in_cache_low(index_id); ut_a(!node->index == (err != DB_SUCCESS)); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index ba251dc427d..0070cf904fc 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1970,6 +1970,19 @@ trx_is_interrupted( return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd)); } +/**********************************************************************//** +Determines if the currently running transaction is in strict mode. +@return TRUE if strict */ +extern "C" UNIV_INTERN +ibool +trx_is_strict( +/*==========*/ + trx_t* trx) /*!< in: transaction */ +{ + return(trx && trx->mysql_thd + && THDVAR((THD*) trx->mysql_thd, strict_mode)); +} + /**************************************************************//** Resets some fields of a prebuilt struct. The template is used in fast retrieval of just those column values MySQL needs in its processing. */ diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 32d9b9f0586..3244080c3be 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -922,9 +922,8 @@ convert_error: trx_commit_for_mysql(prebuilt->trx); } - ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE)); - if (dict_locked) { + ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE)); row_mysql_unlock_data_dictionary(trx); } diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h index e01fafe652d..1a13bd1503a 100644 --- a/storage/innobase/include/dict0boot.h +++ b/storage/innobase/include/dict0boot.h @@ -137,7 +137,7 @@ clustered index */ #define DICT_SYS_INDEXES_PAGE_NO_FIELD 8 #define DICT_SYS_INDEXES_SPACE_NO_FIELD 7 #define DICT_SYS_INDEXES_TYPE_FIELD 6 -#define DICT_SYS_INDEXES_NAME_FIELD 3 +#define DICT_SYS_INDEXES_NAME_FIELD 4 /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 480f265a138..6872fb463c0 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -391,6 +391,14 @@ ibool trx_is_interrupted( /*===============*/ trx_t* trx); /*!< in: transaction */ +/**********************************************************************//** +Determines if the currently running transaction is in strict mode. +@return TRUE if strict */ +UNIV_INTERN +ibool +trx_is_strict( +/*==========*/ + trx_t* trx); /*!< in: transaction */ #else /* !UNIV_HOTBACKUP */ #define trx_is_interrupted(trx) FALSE #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index a3002569ea5..927f237de0b 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -182,6 +182,9 @@ command. Not tested on Windows. */ #define UNIV_COMPILE_TEST_FUNCS */ +#ifdef HAVE_purify +# define UNIV_DEBUG_VALGRIND +#endif /* HAVE_purify */ #if 0 #define UNIV_DEBUG_VALGRIND /* Enable extra Valgrind instrumentation */ @@ -219,6 +222,10 @@ operations (very slow); also UNIV_DEBUG must be defined */ adaptive hash index */ #define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output in sync0sync.c */ +#define UNIV_BTR_AVOID_COPY /* when splitting B-tree nodes, + do not move any records when + all the records would + be moved */ #define UNIV_BTR_PRINT /* enable functions for printing B-trees */ #define UNIV_ZIP_DEBUG /* extensive consistency checks diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c index 27c11dacc8c..37ba8ca2ffe 100644 --- a/storage/innobase/rem/rem0rec.c +++ b/storage/innobase/rem/rem0rec.c @@ -212,6 +212,13 @@ rec_get_n_extern_new( const dict_col_t* col = dict_field_get_col(field); len = *lens--; + /* If the maximum length of the field is up + to 255 bytes, the actual length is always + stored in one byte. If the maximum length is + more than 255 bytes, the actual length is + stored in one byte for 0..127. The length + will be encoded in two bytes when it is 128 or + more, or when the field is stored externally. */ if (UNIV_UNLIKELY(col->len > 255) || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { if (len & 0x80) { @@ -294,6 +301,13 @@ rec_init_offsets_comp_ordinary( const dict_col_t* col = dict_field_get_col(field); len = *lens--; + /* If the maximum length of the field is up + to 255 bytes, the actual length is always + stored in one byte. If the maximum length is + more than 255 bytes, the actual length is + stored in one byte for 0..127. The length + will be encoded in two bytes when it is 128 or + more, or when the field is stored externally. */ if (UNIV_UNLIKELY(col->len > 255) || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { @@ -425,6 +439,15 @@ rec_init_offsets( const dict_col_t* col = dict_field_get_col(field); len = *lens--; + /* If the maximum length of the field + is up to 255 bytes, the actual length + is always stored in one byte. If the + maximum length is more than 255 bytes, + the actual length is stored in one + byte for 0..127. The length will be + encoded in two bytes when it is 128 or + more, or when the field is stored + externally. */ if (UNIV_UNLIKELY(col->len > 255) || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { @@ -647,6 +670,13 @@ rec_get_offsets_reverse( const dict_col_t* col = dict_field_get_col(field); len = *lens++; + /* If the maximum length of the field is up + to 255 bytes, the actual length is always + stored in one byte. If the maximum length is + more than 255 bytes, the actual length is + stored in one byte for 0..127. The length + will be encoded in two bytes when it is 128 or + more, or when the field is stored externally. */ if (UNIV_UNLIKELY(col->len > 255) || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { if (len & 0x80) { @@ -781,12 +811,20 @@ rec_get_converted_size_comp_prefix( ut_ad(len <= col->len || col->mtype == DATA_BLOB); + /* If the maximum length of a variable-length field + is up to 255 bytes, the actual length is always stored + in one byte. If the maximum length is more than 255 + bytes, the actual length is stored in one byte for + 0..127. The length will be encoded in two bytes when + it is 128 or more, or when the field is stored externally. */ + if (field->fixed_len) { ut_ad(len == field->fixed_len); /* dict_index_add_col() should guarantee this */ ut_ad(!field->prefix_len || field->fixed_len == field->prefix_len); } else if (dfield_is_ext(&fields[i])) { + ut_ad(col->len >= 256 || col->mtype == DATA_BLOB); extra_size += 2; } else if (len < 128 || (col->len < 256 && col->mtype != DATA_BLOB)) { @@ -1086,6 +1124,8 @@ rec_convert_dtuple_to_rec_comp( /* Store the data and the offsets */ for (i = 0, field = fields; i < n_fields; i++, field++) { + const dict_field_t* ifield; + type = dfield_get_type(field); len = dfield_get_len(field); @@ -1120,12 +1160,20 @@ rec_convert_dtuple_to_rec_comp( /* only nullable fields can be null */ ut_ad(!dfield_is_null(field)); - fixed_len = dict_index_get_nth_field(index, i)->fixed_len; - + ifield = dict_index_get_nth_field(index, i); + fixed_len = ifield->fixed_len; + /* If the maximum length of a variable-length field + is up to 255 bytes, the actual length is always stored + in one byte. If the maximum length is more than 255 + bytes, the actual length is stored in one byte for + 0..127. The length will be encoded in two bytes when + it is 128 or more, or when the field is stored externally. */ if (fixed_len) { ut_ad(len == fixed_len); ut_ad(!dfield_is_ext(field)); } else if (dfield_is_ext(field)) { + ut_ad(ifield->col->len >= 256 + || ifield->col->mtype == DATA_BLOB); ut_ad(len <= REC_MAX_INDEX_COL_LEN + BTR_EXTERN_FIELD_REF_SIZE); *lens-- = (byte) (len >> 8) | 0xc0; @@ -1215,11 +1263,20 @@ rec_convert_dtuple_to_rec( mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; const ulint* offsets; + ulint i; rec_offs_init(offsets_); offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap); ut_ad(rec_validate(rec, offsets)); + ut_ad(dtuple_get_n_fields(dtuple) + == rec_offs_n_fields(offsets)); + + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + ut_ad(!dfield_is_ext(dtuple_get_nth_field(dtuple, i)) + == !rec_offs_nth_extern(offsets, i)); + } + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -1402,6 +1459,13 @@ rec_copy_prefix_to_buf( prefix_len += field->fixed_len; } else { ulint len = *lens--; + /* If the maximum length of the column is up + to 255 bytes, the actual length is always + stored in one byte. If the maximum length is + more than 255 bytes, the actual length is + stored in one byte for 0..127. The length + will be encoded in two bytes when it is 128 or + more, or when the column is stored externally. */ if (col->len > 255 || col->mtype == DATA_BLOB) { if (len & 0x80) { /* 1exxxxxx */