diff --git a/mysql-test/suite/innodb/r/innodb_bug56680.result b/mysql-test/suite/innodb/r/innodb_bug56680.result new file mode 100644 index 00000000000..c3afc3e6581 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_bug56680.result @@ -0,0 +1,102 @@ +SET GLOBAL tx_isolation='REPEATABLE-READ'; +CREATE TABLE bug56680( +a INT AUTO_INCREMENT PRIMARY KEY, +b CHAR(1), +c INT, +INDEX(b)) +ENGINE=InnoDB; +INSERT INTO bug56680 VALUES(0,'x',1); +BEGIN; +SELECT b FROM bug56680; +b +x +BEGIN; +UPDATE bug56680 SET b='X'; +SELECT b FROM bug56680; +b +x +SELECT * FROM bug56680; +a b c +1 x 1 +ROLLBACK; +SELECT b FROM bug56680; +b +x +SET GLOBAL tx_isolation='READ-UNCOMMITTED'; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +BEGIN; +SELECT b FROM bug56680 LIMIT 2; +b +x +x +BEGIN; +DELETE FROM bug56680 WHERE a=1; +INSERT INTO bug56680 VALUES(1,'X',1); +SELECT b FROM bug56680 LIMIT 3; +b +X +x +x +SELECT b FROM bug56680 LIMIT 2; +b +x +x +CHECK TABLE bug56680; +Table Op Msg_type Msg_text +test.bug56680 check status OK +ROLLBACK; +SELECT b FROM bug56680 LIMIT 2; +b +x +x +CHECK TABLE bug56680; +Table Op Msg_type Msg_text +test.bug56680 check status OK +SELECT b FROM bug56680 LIMIT 2; +b +x +x +CREATE TABLE bug56680_2( +a INT AUTO_INCREMENT PRIMARY KEY, +b VARCHAR(2) CHARSET latin1 COLLATE latin1_german2_ci, +c INT, +INDEX(b)) +ENGINE=InnoDB; +INSERT INTO bug56680_2 SELECT 0,_latin1 0xdf,c FROM bug56680; +BEGIN; +SELECT HEX(b) FROM bug56680_2 LIMIT 2; +HEX(b) +DF +DF +DELETE FROM bug56680_2 WHERE a=1; +INSERT INTO bug56680_2 VALUES(1,'SS',1); +SELECT HEX(b) FROM bug56680_2 LIMIT 3; +HEX(b) +5353 +DF +DF +CHECK TABLE bug56680_2; +Table Op Msg_type Msg_text +test.bug56680_2 check status OK +DELETE FROM bug56680_2 WHERE a=1; +INSERT INTO bug56680_2 VALUES(1,_latin1 0xdf,1); +SELECT HEX(b) FROM bug56680_2 LIMIT 3; +HEX(b) +DF +DF +DF +CHECK TABLE bug56680_2; +Table Op Msg_type Msg_text +test.bug56680_2 check status OK +DROP TABLE bug56680_2; +DROP TABLE bug56680; diff --git a/mysql-test/suite/innodb/t/innodb_bug56680.test b/mysql-test/suite/innodb/t/innodb_bug56680.test new file mode 100644 index 00000000000..ab81ca2f8e6 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_bug56680.test @@ -0,0 +1,130 @@ +# +# Bug #56680 InnoDB may return wrong results from a case-insensitive index +# +-- source include/have_innodb.inc + +-- disable_query_log +SET @tx_isolation_orig = @@tx_isolation; +# The flag innodb_change_buffering_debug is only available in debug builds. +# It instructs InnoDB to try to evict pages from the buffer pool when +# change buffering is possible, so that the change buffer will be used +# whenever possible. +-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE +SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug; +-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE +SET GLOBAL innodb_change_buffering_debug = 1; +-- enable_query_log +SET GLOBAL tx_isolation='REPEATABLE-READ'; + +CREATE TABLE bug56680( + a INT AUTO_INCREMENT PRIMARY KEY, + b CHAR(1), + c INT, + INDEX(b)) +ENGINE=InnoDB; + +INSERT INTO bug56680 VALUES(0,'x',1); +BEGIN; +SELECT b FROM bug56680; + +connect (con1,localhost,root,,); +connection con1; +BEGIN; +UPDATE bug56680 SET b='X'; + +connection default; +# This should return the last committed value 'x', but would return 'X' +# due to a bug in row_search_for_mysql(). +SELECT b FROM bug56680; +# This would always return the last committed value 'x'. +SELECT * FROM bug56680; + +connection con1; +ROLLBACK; +disconnect con1; + +connection default; + +SELECT b FROM bug56680; + +# For the rest of this test, use the READ UNCOMMITTED isolation level +# to see what exists in the secondary index. +SET GLOBAL tx_isolation='READ-UNCOMMITTED'; + +# Create enough rows for the table, so that the insert buffer will be +# used for modifying the secondary index page. There must be multiple +# index pages, because changes to the root page are never buffered. + +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; +INSERT INTO bug56680 SELECT 0,b,c FROM bug56680; + +BEGIN; +SELECT b FROM bug56680 LIMIT 2; + +connect (con1,localhost,root,,); +connection con1; +BEGIN; +DELETE FROM bug56680 WHERE a=1; +# This should be buffered, if innodb_change_buffering_debug = 1 is in effect. +INSERT INTO bug56680 VALUES(1,'X',1); + +# This should force an insert buffer merge, and return 'X' in the first row. +SELECT b FROM bug56680 LIMIT 3; + +connection default; +SELECT b FROM bug56680 LIMIT 2; +CHECK TABLE bug56680; + +connection con1; +ROLLBACK; +SELECT b FROM bug56680 LIMIT 2; +CHECK TABLE bug56680; + +connection default; +disconnect con1; + +SELECT b FROM bug56680 LIMIT 2; + +CREATE TABLE bug56680_2( + a INT AUTO_INCREMENT PRIMARY KEY, + b VARCHAR(2) CHARSET latin1 COLLATE latin1_german2_ci, + c INT, + INDEX(b)) +ENGINE=InnoDB; + +INSERT INTO bug56680_2 SELECT 0,_latin1 0xdf,c FROM bug56680; + +BEGIN; +SELECT HEX(b) FROM bug56680_2 LIMIT 2; +DELETE FROM bug56680_2 WHERE a=1; +# This should be buffered, if innodb_change_buffering_debug = 1 is in effect. +INSERT INTO bug56680_2 VALUES(1,'SS',1); + +# This should force an insert buffer merge, and return 'SS' in the first row. +SELECT HEX(b) FROM bug56680_2 LIMIT 3; +CHECK TABLE bug56680_2; + +DELETE FROM bug56680_2 WHERE a=1; +# This should be buffered, if innodb_change_buffering_debug = 1 is in effect. +INSERT INTO bug56680_2 VALUES(1,_latin1 0xdf,1); + +# This should force an insert buffer merge, and return 0xdf in the first row. +SELECT HEX(b) FROM bug56680_2 LIMIT 3; +CHECK TABLE bug56680_2; + +DROP TABLE bug56680_2; +DROP TABLE bug56680; + +-- disable_query_log +SET GLOBAL tx_isolation = @tx_isolation_orig; +-- error 0, ER_UNKNOWN_SYSTEM_VARIABLE +SET GLOBAL innodb_change_buffering_debug = @innodb_change_buffering_debug_orig; diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c index 45867388a61..500088c3901 100644 --- a/storage/innobase/buf/buf0buf.c +++ b/storage/innobase/buf/buf0buf.c @@ -1270,6 +1270,30 @@ loop: buf_awe_map_page_to_frame(block, TRUE); } +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG + if (mode == BUF_GET_IF_IN_POOL && ibuf_debug) { + /* Try to evict the block from the buffer pool, to use the + insert buffer as much as possible. */ + + if (buf_LRU_free_block(block)) { + mutex_exit(&buf_pool->mutex); + mutex_exit(&block->mutex); + fprintf(stderr, + "innodb_change_buffering_debug evict %u %u\n", + (unsigned) space, (unsigned) offset); + return(NULL); + } else if (buf_flush_page_try(block)) { + fprintf(stderr, + "innodb_change_buffering_debug flush %u %u\n", + (unsigned) space, (unsigned) offset); + guess = block->frame; + goto loop; + } + + /* Failed to evict the page; change it directly */ + } +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + #ifdef UNIV_SYNC_DEBUG buf_block_buf_fix_inc_debug(block, file, line); #else diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c index 24fa306c127..7dd6bfd9198 100644 --- a/storage/innobase/buf/buf0flu.c +++ b/storage/innobase/buf/buf0flu.c @@ -723,6 +723,82 @@ buf_flush_try_page( return(0); } +# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +/********************************************************************** +Writes a flushable page asynchronously from the buffer pool to a file. +NOTE: buf_pool_mutex and block->mutex must be held upon entering this +function, and they will be released by this function after flushing. +This is loosely based on buf_flush_batch() and buf_flush_try_page(). */ + +ibool +buf_flush_page_try( +/*===============*/ + /* out: TRUE if flushed and + mutexes released */ + buf_block_t* block) /*!< in/out: buffer control block */ +{ + ut_ad(mutex_own(&buf_pool->mutex)); + ut_ad(block->state == BUF_BLOCK_FILE_PAGE); + ut_ad(mutex_own(&block->mutex)); + + if (!buf_flush_ready_for_flush(block, BUF_FLUSH_LRU)) { + return(FALSE); + } + + if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0 + || buf_pool->init_flush[BUF_FLUSH_LRU]) { + /* There is already a flush batch of the same type running */ + return(FALSE); + } + + buf_pool->init_flush[BUF_FLUSH_LRU] = TRUE; + + block->io_fix = BUF_IO_WRITE; + block->flush_type = BUF_FLUSH_LRU; + + if (buf_pool->n_flush[BUF_FLUSH_LRU]++ == 0) { + + os_event_reset(buf_pool->no_flush[BUF_FLUSH_LRU]); + } + + /* VERY IMPORTANT: + Because any thread may call the LRU flush, even when owning + locks on pages, to avoid deadlocks, we must make sure that the + s-lock is acquired on the page without waiting: this is + accomplished because buf_flush_ready_for_flush() must hold, + and that requires the page not to be bufferfixed. */ + + rw_lock_s_lock_gen(&block->lock, BUF_IO_WRITE); + + /* Note that the s-latch is acquired before releasing the + buf_pool mutex: this ensures that the latch is acquired + immediately. */ + + mutex_exit(&block->mutex); + mutex_exit(&buf_pool->mutex); + + /* Even though block is not protected by any mutex at this + point, it is safe to access block, because it is io_fixed and + oldest_modification != 0. Thus, it cannot be relocated in the + buffer pool or removed from flush_list or LRU_list. */ + + buf_flush_write_block_low(block); + + mutex_enter(&buf_pool->mutex); + buf_pool->init_flush[BUF_FLUSH_LRU] = FALSE; + + if (buf_pool->n_flush[BUF_FLUSH_LRU] == 0) { + /* The running flush batch has ended */ + os_event_set(buf_pool->no_flush[BUF_FLUSH_LRU]); + } + + mutex_exit(&buf_pool->mutex); + buf_flush_buffered_writes(); + + return(TRUE); +} +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + /*************************************************************** Flushes to disk all flushable pages within the flush area. */ static diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c index d3c787d1578..1dc3efd1464 100644 --- a/storage/innobase/buf/buf0lru.c +++ b/storage/innobase/buf/buf0lru.c @@ -320,6 +320,60 @@ buf_LRU_get_recent_limit(void) return(limit); } +/********************************************************************** +Try to put a block from the LRU list to the free list. */ + +ibool +buf_LRU_free_block( +/*===============*/ + /* out: TRUE if freed */ + buf_block_t* block) /* in/out: block to be freed */ +{ + if (!buf_flush_ready_for_replace(block)) { + return(FALSE); + } + +#ifdef UNIV_DEBUG + if (buf_debug_prints) { + fprintf(stderr, + "Putting space %lu page %lu" + " to free list\n", + (ulong) block->space, + (ulong) block->offset); + } +#endif /* UNIV_DEBUG */ + + buf_LRU_block_remove_hashed_page(block); + + mutex_exit(&(buf_pool->mutex)); + mutex_exit(&block->mutex); + + /* Remove possible adaptive hash index built on the + page; in the case of AWE the block may not have a + frame at all */ + + if (block->frame) { + /* The page was declared uninitialized + by buf_LRU_block_remove_hashed_page(). + We need to flag the contents of the + page valid (which it still is) in + order to avoid bogus Valgrind + warnings. */ + UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE); + btr_search_drop_page_hash_index(block->frame); + UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE); + } + + ut_a(block->buf_fix_count == 0); + + mutex_enter(&(buf_pool->mutex)); + mutex_enter(&block->mutex); + + buf_LRU_block_free_hashed_page(block); + + return(TRUE); +} + /********************************************************************** Look for a replaceable block from the end of the LRU list and put it to the free list if found. */ @@ -348,54 +402,13 @@ buf_LRU_search_and_free_block( ut_a(block->in_LRU_list); mutex_enter(&block->mutex); + freed = buf_LRU_free_block(block); + mutex_exit(&block->mutex); - if (buf_flush_ready_for_replace(block)) { - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Putting space %lu page %lu" - " to free list\n", - (ulong) block->space, - (ulong) block->offset); - } -#endif /* UNIV_DEBUG */ - - buf_LRU_block_remove_hashed_page(block); - - mutex_exit(&(buf_pool->mutex)); - mutex_exit(&block->mutex); - - /* Remove possible adaptive hash index built on the - page; in the case of AWE the block may not have a - frame at all */ - - if (block->frame) { - /* The page was declared uninitialized - by buf_LRU_block_remove_hashed_page(). - We need to flag the contents of the - page valid (which it still is) in - order to avoid bogus Valgrind - warnings. */ - UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE); - btr_search_drop_page_hash_index(block->frame); - UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE); - } - - ut_a(block->buf_fix_count == 0); - - mutex_enter(&(buf_pool->mutex)); - mutex_enter(&block->mutex); - - buf_LRU_block_free_hashed_page(block); - freed = TRUE; - mutex_exit(&block->mutex); - + if (freed) { break; } - mutex_exit(&block->mutex); - block = UT_LIST_GET_PREV(LRU, block); distance++; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 5a8f5479223..4c52326a58a 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -79,6 +79,7 @@ extern "C" { #include "../storage/innobase/include/dict0crea.h" #include "../storage/innobase/include/btr0cur.h" #include "../storage/innobase/include/btr0btr.h" +#include "../storage/innobase/include/ibuf0ibuf.h" #include "../storage/innobase/include/fsp0fsp.h" #include "../storage/innobase/include/sync0sync.h" #include "../storage/innobase/include/fil0fil.h" @@ -3723,17 +3724,18 @@ include_field: n_requested_fields++; templ->col_no = i; + templ->clust_rec_field_no = dict_col_get_clust_pos_noninline( + &index->table->cols[i], clust_index); + ut_ad(templ->clust_rec_field_no != ULINT_UNDEFINED); if (index == clust_index) { - templ->rec_field_no = dict_col_get_clust_pos_noninline( - &index->table->cols[i], index); + templ->rec_field_no = templ->clust_rec_field_no; } else { templ->rec_field_no = dict_index_get_nth_col_pos( index, i); - } - - if (templ->rec_field_no == ULINT_UNDEFINED) { - prebuilt->need_to_access_clustered = TRUE; + if (templ->rec_field_no == ULINT_UNDEFINED) { + prebuilt->need_to_access_clustered = TRUE; + } } if (field->null_ptr) { @@ -3785,9 +3787,7 @@ skip_field: for (i = 0; i < n_requested_fields; i++) { templ = prebuilt->mysql_template + i; - templ->rec_field_no = dict_col_get_clust_pos_noninline( - &index->table->cols[templ->col_no], - clust_index); + templ->rec_field_no = templ->clust_rec_field_no; } } } @@ -8990,6 +8990,13 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode, AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */ AUTOINC_NO_LOCKING, 0); /* Maximum value */ +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, + PLUGIN_VAR_RQCMDARG, + "Debug flags for InnoDB change buffering (0=none)", + NULL, NULL, 0, 0, 1, 0); +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(autoextend_increment), @@ -9031,6 +9038,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(thread_concurrency), MYSQL_SYSVAR(thread_sleep_delay), MYSQL_SYSVAR(autoinc_lock_mode), +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG + MYSQL_SYSVAR(change_buffering_debug), +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ NULL }; diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c index d54a3378993..71ecc7ec49f 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -22,6 +22,7 @@ Created 7/19/1997 Heikki Tuuri #include "btr0cur.h" #include "btr0pcur.h" #include "btr0btr.h" +#include "row0upd.h" #include "sync0sync.h" #include "dict0boot.h" #include "fut0lst.h" @@ -137,6 +138,11 @@ access order rules. */ /* Buffer pool size per the maximum insert buffer size */ #define IBUF_POOL_SIZE_PER_MAX_SIZE 2 +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +/* Flag to control insert buffer debugging. */ +uint ibuf_debug; +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + /* The insert buffer control structure */ ibuf_t* ibuf = NULL; @@ -2819,6 +2825,72 @@ ibuf_insert( } } +/************************************************************************ +During merge, inserts to an index page a secondary index entry extracted +from the insert buffer. */ +static +void +ibuf_insert_to_index_page_low( +/*==========================*/ + dtuple_t* entry, /* in: buffered entry to insert */ + page_t* page, /* in: index page where the buffered entry + should be placed */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr, /* in: mtr */ + page_cur_t* page_cur)/* in: cursor positioned on the record + after which to insert the buffered entry */ +{ + ulint space; + ulint page_no; + page_t* bitmap_page; + ulint old_bits; + + if (UNIV_LIKELY + (page_cur_tuple_insert(page_cur, entry, index, mtr) != NULL)) { + return; + } + + /* If the record did not fit, reorganize */ + + btr_page_reorganize(page, index, mtr); + + page_cur_search(page, index, entry, PAGE_CUR_LE, page_cur); + + /* This time the record must fit */ + + if (UNIV_LIKELY + (page_cur_tuple_insert(page_cur, entry, index, mtr) != NULL)) { + return; + } + + ut_print_timestamp(stderr); + + fprintf(stderr, + " InnoDB: Error: Insert buffer insert fails;" + " page free %lu, dtuple size %lu\n", + (ulong) page_get_max_insert_size(page, 1), + (ulong) rec_get_converted_size(index, entry)); + fputs("InnoDB: Cannot insert index record ", stderr); + dtuple_print(stderr, entry); + fputs("\nInnoDB: The table where this index record belongs\n" + "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n" + "InnoDB: that table.\n", stderr); + + space = buf_frame_get_space_id(page); + page_no = buf_frame_get_page_no(page); + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr); + old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, + IBUF_BITMAP_FREE, mtr); + + fprintf(stderr, + "InnoDB: space %lu, page %lu, bitmap bits %lu\n", + (ulong) space, (ulong) page_no, (ulong) old_bits); + + fputs("InnoDB: Submit a detailed bug report" + " to http://bugs.mysql.com\n", stderr); +} + /************************************************************************ During merge, inserts to an index page a secondary index entry extracted from the insert buffer. */ @@ -2835,11 +2907,10 @@ ibuf_insert_to_index_page( page_cur_t page_cur; ulint low_match; rec_t* rec; - page_t* bitmap_page; - ulint old_bits; ut_ad(ibuf_inside()); ut_ad(dtuple_check_typed(entry)); + ut_ad(!buf_block_align(page)->is_hashed); if (UNIV_UNLIKELY(dict_table_is_comp(index->table) != (ibool)!!page_is_comp(page))) { @@ -2877,61 +2948,79 @@ dump: low_match = page_cur_search(page, index, entry, PAGE_CUR_LE, &page_cur); - if (low_match == dtuple_get_n_fields(entry)) { + if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) { + mem_heap_t* heap; + upd_t* update; + ulint* offsets; + rec = page_cur_get_rec(&page_cur); - btr_cur_del_unmark_for_ibuf(rec, mtr); - } else { - rec = page_cur_tuple_insert(&page_cur, entry, index, mtr); + /* This is based on + row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */ + ut_ad(rec_get_deleted_flag(rec, page_is_comp(page))); - if (rec == NULL) { - /* If the record did not fit, reorganize */ + heap = mem_heap_create(1024); - btr_page_reorganize(page, index, mtr); + offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, + &heap); + update = row_upd_build_sec_rec_difference_binary( + index, entry, rec, NULL, heap); - page_cur_search(page, index, entry, - PAGE_CUR_LE, &page_cur); - - /* This time the record must fit */ - if (UNIV_UNLIKELY(!page_cur_tuple_insert( - &page_cur, entry, index, - mtr))) { - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: Insert buffer insert" - " fails; page free %lu," - " dtuple size %lu\n", - (ulong) page_get_max_insert_size( - page, 1), - (ulong) rec_get_converted_size( - index, entry)); - fputs("InnoDB: Cannot insert index record ", - stderr); - dtuple_print(stderr, entry); - fputs("\nInnoDB: The table where" - " this index record belongs\n" - "InnoDB: is now probably corrupt." - " Please run CHECK TABLE on\n" - "InnoDB: that table.\n", stderr); - - bitmap_page = ibuf_bitmap_get_map_page( - buf_frame_get_space_id(page), - buf_frame_get_page_no(page), - mtr); - old_bits = ibuf_bitmap_page_get_bits( - bitmap_page, - buf_frame_get_page_no(page), - IBUF_BITMAP_FREE, mtr); - - fprintf(stderr, "InnoDB: Bitmap bits %lu\n", - (ulong) old_bits); - - fputs("InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); - } + if (update->n_fields == 0) { + /* The records only differ in the delete-mark. + Clear the delete-mark, like we did before + Bug #56680 was fixed. */ + btr_cur_del_unmark_for_ibuf(rec, mtr); +updated_in_place: + mem_heap_free(heap); + return; } + + /* Copy the info bits. Clear the delete-mark. */ + update->info_bits = rec_get_info_bits(rec, page_is_comp(page)); + update->info_bits &= ~REC_INFO_DELETED_FLAG; + + /* We cannot invoke btr_cur_optimistic_update() here, + because we do not have a btr_cur_t or que_thr_t, + as the insert buffer merge occurs at a very low level. */ + if (!row_upd_changes_field_size_or_external(index, offsets, + update)) { + /* This is the easy case. Do something similar + to btr_cur_update_in_place(). */ + row_upd_rec_in_place(rec, offsets, update); + goto updated_in_place; + } + + /* A collation may identify values that differ in + storage length. + Some examples (1 or 2 bytes): + utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I + utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S + utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS + + latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S + + Examples of a character (3-byte UTF-8 sequence) + identified with 2 or 4 characters (1-byte UTF-8 sequences): + + utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO + utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN + */ + + /* Delete the different-length record, and insert the + buffered one. */ + + lock_rec_store_on_page_infimum(page, rec); + page_cur_delete_rec(&page_cur, index, offsets, mtr); + page_cur_move_to_prev(&page_cur); + mem_heap_free(heap); + + ibuf_insert_to_index_page_low(entry, page, index, mtr, + &page_cur); + lock_rec_restore_from_page_infimum(rec, page); + } else { + ibuf_insert_to_index_page_low(entry, page, index, mtr, + &page_cur); } } diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index 322848509f4..e52c22b8f57 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -38,6 +38,20 @@ buf_flush_init_for_writing( dulint newest_lsn, /* in: newest modification lsn to the page */ ulint space, /* in: space id */ ulint page_no); /* in: page number */ +# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +/********************************************************************** +Writes a flushable page asynchronously from the buffer pool to a file. +NOTE: buf_pool_mutex and block->mutex must be held upon entering this +function, and they will be released by this function after flushing. +This is loosely based on buf_flush_batch() and buf_flush_try_page(). */ + +ibool +buf_flush_page_try( +/*===============*/ + /* out: TRUE if flushed and + mutexes released */ + buf_block_t* block); /*!< in/out: buffer control block */ +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ /*********************************************************************** This utility flushes dirty blocks from the end of the LRU list or flush_list. NOTE 1: in the case of an LRU flush the calling thread may own latches to diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index 6d26fd4d3b2..777e55a350d 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -66,6 +66,14 @@ buf_LRU_get_recent_limit(void); /*==========================*/ /* out: the limit; zero if could not determine it */ /********************************************************************** +Try to put a block from the LRU list to the free list. */ + +ibool +buf_LRU_free_block( +/*===============*/ + /* out: TRUE if freed */ + buf_block_t* block); /* in/out: block to be freed */ +/********************************************************************** Look for a replaceable block from the end of the LRU list and put it to the free list if found. */ diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h index 77fefe2020b..d6d7a918b62 100644 --- a/storage/innobase/include/ibuf0ibuf.h +++ b/storage/innobase/include/ibuf0ibuf.h @@ -18,6 +18,11 @@ Created 7/19/1997 Heikki Tuuri #include "ibuf0types.h" #include "fsp0fsp.h" +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +/* Flag to control insert buffer debugging. */ +extern uint ibuf_debug; +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + extern ibuf_t* ibuf; /********************************************************************** diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h index abc204bb583..58762fc3111 100644 --- a/storage/innobase/include/rem0rec.h +++ b/storage/innobase/include/rem0rec.h @@ -19,6 +19,9 @@ if and only if the record is the first user record on a non-leaf B-tree page that is the leftmost page on its level (PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */ #define REC_INFO_MIN_REC_FLAG 0x10UL +/* The deleted flag in info bits */ +#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the + record has been delete marked */ /* Number of extra bytes in an old-style record, in addition to the data and the offsets */ diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic index d91fb4c4391..df66bb13aeb 100644 --- a/storage/innobase/include/rem0rec.ic +++ b/storage/innobase/include/rem0rec.ic @@ -98,9 +98,6 @@ and the shift needed to obtain each bit-field of the record. */ #define REC_INFO_BITS_MASK 0xF0UL #define REC_INFO_BITS_SHIFT 0 -/* The deleted flag in info bits */ -#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the - record has been delete marked */ /* The following masks are used to filter the SQL null bit from one-byte and two-byte offsets */ diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index 488177791a4..52b13838cc7 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -485,6 +485,10 @@ struct mysql_row_templ_struct { Innobase record in the current index; not defined if template_type is ROW_MYSQL_WHOLE_ROW */ + ulint clust_rec_field_no; /* field number of the column in an + Innobase record in the clustered index; + not defined if template_type is + ROW_MYSQL_WHOLE_ROW */ ulint mysql_col_offset; /* offset of the column in the MySQL row format */ ulint mysql_col_len; /* length of the column in the MySQL diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h index efbc6d6facf..034b1dafb17 100644 --- a/storage/innobase/include/row0upd.h +++ b/storage/innobase/include/row0upd.h @@ -129,9 +129,11 @@ row_upd_changes_field_size_or_external( const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update);/* in: update vector */ /*************************************************************** -Replaces the new column values stored in the update vector to the record -given. No field size changes are allowed. This function is used only for -a clustered index */ +Replaces the new column values stored in the update vector to the +record given. No field size changes are allowed. This function is +usually invoked on a clustered index. The only use case for a +secondary index is row_ins_sec_index_entry_by_modify() or its +counterpart in ibuf_insert_to_index_page(). */ void row_upd_rec_in_place( diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c index a0f54f7288e..99738115cc7 100644 --- a/storage/innobase/row/row0mysql.c +++ b/storage/innobase/row/row0mysql.c @@ -400,7 +400,7 @@ row_mysql_convert_row_to_innobase( row is used, as row may contain pointers to this record! */ { - mysql_row_templ_t* templ; + const mysql_row_templ_t*templ; dfield_t* dfield; ulint i; diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c index ad15d0798a2..e03d3d79768 100644 --- a/storage/innobase/row/row0sel.c +++ b/storage/innobase/row/row0sel.c @@ -2601,20 +2601,21 @@ row_sel_store_mysql_rec( row_prebuilt_t* prebuilt, /* in: prebuilt struct */ rec_t* rec, /* in: Innobase record in the index which was described in prebuilt's - template */ + template, or in the clustered index; + must be protected by a page latch */ + ibool rec_clust, /* in: TRUE if rec is in the clustered + index instead of prebuilt->index */ const ulint* offsets) /* in: array returned by - rec_get_offsets() */ + rec_get_offsets(rec) */ { - mysql_row_templ_t* templ; mem_heap_t* extern_field_heap = NULL; mem_heap_t* heap; - byte* data; - ulint len; ulint i; ut_ad(prebuilt->mysql_template); ut_ad(prebuilt->default_rec); ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { mem_heap_free(prebuilt->blob_heap); @@ -2623,10 +2624,15 @@ row_sel_store_mysql_rec( for (i = 0; i < prebuilt->n_template; i++) { - templ = prebuilt->mysql_template + i; + const mysql_row_templ_t*templ = prebuilt->mysql_template + i; + byte* data; + ulint len; + ulint field_no; - if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, - templ->rec_field_no))) { + field_no = rec_clust + ? templ->clust_rec_field_no : templ->rec_field_no; + + if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) { /* Copy an externally stored field to the temporary heap */ @@ -2652,15 +2658,13 @@ row_sel_store_mysql_rec( causes an assert */ data = btr_rec_copy_externally_stored_field( - rec, offsets, templ->rec_field_no, - &len, heap); + rec, offsets, field_no, &len, heap); ut_a(len != UNIV_SQL_NULL); } else { /* Field is stored in the row. */ - data = rec_get_nth_field(rec, offsets, - templ->rec_field_no, &len); + data = rec_get_nth_field(rec, offsets, field_no, &len); if (UNIV_UNLIKELY(templ->type == DATA_BLOB) && len != UNIV_SQL_NULL) { @@ -3019,7 +3023,7 @@ row_sel_pop_cached_row_for_mysql( row_prebuilt_t* prebuilt) /* in: prebuilt struct */ { ulint i; - mysql_row_templ_t* templ; + const mysql_row_templ_t*templ; byte* cached_rec; ut_ad(prebuilt->n_fetch_cached > 0); ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len); @@ -3075,14 +3079,19 @@ void row_sel_push_cache_row_for_mysql( /*=============================*/ row_prebuilt_t* prebuilt, /* in: prebuilt struct */ - rec_t* rec, /* in: record to push */ - const ulint* offsets) /* in: rec_get_offsets() */ + rec_t* rec, /* in: Innobase record in the index + which was described in prebuilt's + template, or in the clustered index */ + ibool rec_clust, /* in: TRUE if rec is in the clustered + index instead of prebuilt->index */ + const ulint* offsets) /* in: rec_get_offsets(rec) */ { byte* buf; ulint i; ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE); ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); ut_a(!prebuilt->templ_contains_blob); if (prebuilt->fetch_cache[0] == NULL) { @@ -3111,7 +3120,7 @@ row_sel_push_cache_row_for_mysql( if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( prebuilt->fetch_cache[ prebuilt->n_fetch_cached], - prebuilt, rec, offsets))) { + prebuilt, rec, rec_clust, offsets))) { ut_error; } @@ -3500,7 +3509,8 @@ row_search_for_mysql( rec, offsets)); #endif if (!row_sel_store_mysql_rec(buf, prebuilt, - rec, offsets)) { + rec, FALSE, + offsets)) { err = DB_TOO_BIG_RECORD; /* We let the main loop to do the @@ -4233,19 +4243,8 @@ requires_clust_rec: goto next_rec; } - if (prebuilt->need_to_access_clustered) { - - result_rec = clust_rec; - - ut_ad(rec_offs_validate(result_rec, clust_index, - offsets)); - } else { - /* We used 'offsets' for the clust rec, recalculate - them for 'rec' */ - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - result_rec = rec; - } + result_rec = clust_rec; + ut_ad(rec_offs_validate(result_rec, clust_index, offsets)); } else { result_rec = rec; } @@ -4256,6 +4255,7 @@ requires_clust_rec: ut_ad(rec_offs_validate(result_rec, result_rec != rec ? clust_index : index, offsets)); + ut_ad(!rec_get_deleted_flag(result_rec, comp)); if ((match_mode == ROW_SEL_EXACT || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD) @@ -4276,7 +4276,7 @@ requires_clust_rec: cursor. */ row_sel_push_cache_row_for_mysql(prebuilt, result_rec, - offsets); + result_rec != rec, offsets); if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) { goto got_row; @@ -4284,15 +4284,31 @@ requires_clust_rec: goto next_rec; } else { - if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) { + if (UNIV_UNLIKELY + (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) { + /* CHECK TABLE: fetch the row */ + + if (result_rec != rec + && !prebuilt->need_to_access_clustered) { + /* We used 'offsets' for the clust + rec, recalculate them for 'rec' */ + offsets = rec_get_offsets(rec, index, offsets, + ULINT_UNDEFINED, + &heap); + result_rec = rec; + } + memcpy(buf + 4, result_rec - rec_offs_extra_size(offsets), rec_offs_size(offsets)); mach_write_to_4(buf, rec_offs_extra_size(offsets) + 4); } else { - if (!row_sel_store_mysql_rec(buf, prebuilt, - result_rec, offsets)) { + /* Returning a row to MySQL */ + + if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec, + result_rec != rec, + offsets)) { err = DB_TOO_BIG_RECORD; goto lock_wait_or_error; diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c index 034b7010410..0790cfe02e2 100644 --- a/storage/innobase/row/row0upd.c +++ b/storage/innobase/row/row0upd.c @@ -430,9 +430,11 @@ row_upd_changes_field_size_or_external( } /*************************************************************** -Replaces the new column values stored in the update vector to the record -given. No field size changes are allowed. This function is used only for -a clustered index */ +Replaces the new column values stored in the update vector to the +record given. No field size changes are allowed. This function is +usually invoked on a clustered index. The only use case for a +secondary index is row_ins_sec_index_entry_by_modify() or its +counterpart in ibuf_insert_to_index_page(). */ void row_upd_rec_in_place(