From 590fdf7792df16bf7866574432d19e48e49254c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 21 Dec 2010 11:39:19 +0200 Subject: [PATCH 1/2] Bug#58912 InnoDB unnecessarily avoids update-in-place on column prefix indexes row_upd_changes_ord_field_binary(): Do not return TRUE if the update vector changes a column that is covered by a prefix index, but does not change the column prefix. Add the row_ext_t parameter for determining whether the prefixes of externally stored columns match. dfield_datas_are_binary_equal(): Add the parameter len, for comparing column prefixes when len > 0. innodb.test: Add a test case where the patch of Bug #55284 failed without this fix. rb:537 approved by Jimmy Yang --- .../suite/innodb_plugin/r/innodb.result | 8 +- mysql-test/suite/innodb_plugin/t/innodb.test | 12 +++ storage/innodb_plugin/ChangeLog | 10 ++ storage/innodb_plugin/btr/btr0cur.c | 3 +- storage/innodb_plugin/include/data0data.h | 11 ++- storage/innodb_plugin/include/data0data.ic | 22 +++-- storage/innodb_plugin/include/row0upd.h | 5 +- storage/innodb_plugin/row/row0purge.c | 2 +- storage/innodb_plugin/row/row0umod.c | 4 +- storage/innodb_plugin/row/row0upd.c | 93 +++++++++++++------ 10 files changed, 128 insertions(+), 42 deletions(-) diff --git a/mysql-test/suite/innodb_plugin/r/innodb.result b/mysql-test/suite/innodb_plugin/r/innodb.result index 75a7023f9d0..d3f7a0b9fff 100644 --- a/mysql-test/suite/innodb_plugin/r/innodb.result +++ b/mysql-test/suite/innodb_plugin/r/innodb.result @@ -1,5 +1,8 @@ drop table if exists t1,t2,t3,t4; drop database if exists mysqltest; +CREATE TABLE bug58912 (a BLOB, b TEXT, PRIMARY KEY(a(1))) ENGINE=InnoDB; +INSERT INTO bug58912 VALUES(REPEAT('a',8000),REPEAT('b',8000)); +UPDATE bug58912 SET a=REPEAT('a',7999); create table t1 (id int unsigned not null auto_increment, code tinyint unsigned not null, name char(20) not null, primary key (id), key (code), unique (name)) engine=innodb; insert into t1 (code, name) values (1, 'Tim'), (1, 'Monty'), (2, 'David'), (2, 'Erik'), (3, 'Sasha'), (3, 'Jeremy'), (4, 'Matt'); select id, code, name from t1 order by id; @@ -1676,10 +1679,10 @@ variable_value - @innodb_rows_deleted_orig 71 SELECT variable_value - @innodb_rows_inserted_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_inserted'; variable_value - @innodb_rows_inserted_orig -1066 +1067 SELECT variable_value - @innodb_rows_updated_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_updated'; variable_value - @innodb_rows_updated_orig -865 +866 SELECT variable_value - @innodb_row_lock_waits_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_waits'; variable_value - @innodb_row_lock_waits_orig 0 @@ -3239,3 +3242,4 @@ Variable_name Value Handler_update 1 Variable_name Value Handler_delete 1 +DROP TABLE bug58912; diff --git a/mysql-test/suite/innodb_plugin/t/innodb.test b/mysql-test/suite/innodb_plugin/t/innodb.test index 60ba7d1e3bf..cfc2277d501 100644 --- a/mysql-test/suite/innodb_plugin/t/innodb.test +++ b/mysql-test/suite/innodb_plugin/t/innodb.test @@ -43,6 +43,15 @@ drop table if exists t1,t2,t3,t4; drop database if exists mysqltest; --enable_warnings +# Bug#58912 InnoDB unnecessarily avoids update-in-place on column prefixes +CREATE TABLE bug58912 (a BLOB, b TEXT, PRIMARY KEY(a(1))) ENGINE=InnoDB; +INSERT INTO bug58912 VALUES(REPEAT('a',8000),REPEAT('b',8000)); +UPDATE bug58912 SET a=REPEAT('a',7999); +# The above statements used to trigger a failure during purge when +# Bug#55284 was fixed while Bug#58912 was not. Defer the DROP TABLE, +# so that purge gets a chance to run (and a double free of the +# off-page column can be detected, if one is to occur.) + # # Small basic test with ignore # @@ -2541,6 +2550,9 @@ SET GLOBAL innodb_thread_concurrency = @innodb_thread_concurrency_orig; -- enable_query_log +# Clean up after the Bug#55284/Bug#58912 test case. +DROP TABLE bug58912; + ####################################################################### # # # Please, DO NOT TOUCH this file as well as the innodb.result file. # diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 347dc9a5183..13e2836dc98 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,8 +1,18 @@ +2010-12-21 The InnoDB Team + + * include/data0data.h, include/data0data.ic, include/row0upd.h, + btr/btr0cur.c, row/row0purge.c, row/row0umod.c, row/row0upd.c, + innodb.result, innodb.test: + Fix Bug#58912 InnoDB unnecessarily avoids update-in-place + on column prefix indexes + 2010-12-09 The InnoDB Team + * buf/buf0lru.c: Fix Bug#57600 output of I/O sum[%lu] can go negative 2010-11-11 The InnoDB Team + * thr/thr0loc.c, trx/trx0i_s.c: Fix Bug#57802 Empty ASSERTION parameter passed to the HASH_SEARCH macro diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index f9f0d156f52..2a26d8ffb33 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -1756,7 +1756,8 @@ btr_cur_update_in_place( NOT call it if index is secondary */ if (!dict_index_is_clust(index) - || row_upd_changes_ord_field_binary(NULL, index, update)) { + || row_upd_changes_ord_field_binary(NULL, NULL, + index, update)) { /* Remove possible hash index pointer to this record */ btr_search_update_hash_on_delete(cursor); diff --git a/storage/innodb_plugin/include/data0data.h b/storage/innodb_plugin/include/data0data.h index f9fce3f3657..cab8d790ac1 100644 --- a/storage/innodb_plugin/include/data0data.h +++ b/storage/innodb_plugin/include/data0data.h @@ -154,14 +154,19 @@ dfield_dup( dfield_t* field, /*!< in/out: data field */ mem_heap_t* heap); /*!< in: memory heap where allocated */ /*********************************************************************//** -Tests if data length and content is equal for two dfields. -@return TRUE if equal */ +Tests if two data fields are equal. +If len==0, tests the data length and content for equality. +If len>0, tests the first len bytes of the content for equality. +@return TRUE if both fields are NULL or if they are equal */ UNIV_INLINE ibool dfield_datas_are_binary_equal( /*==========================*/ const dfield_t* field1, /*!< in: field */ - const dfield_t* field2);/*!< in: field */ + const dfield_t* field2, /*!< in: field */ + ulint len) /*!< in: maximum prefix to compare, + or 0 to compare the whole field length */ + __attribute__((nonnull, warn_unused_result)); /*********************************************************************//** Tests if dfield data length and content is equal to the given. @return TRUE if equal */ diff --git a/storage/innodb_plugin/include/data0data.ic b/storage/innodb_plugin/include/data0data.ic index da79aa33702..74e0f7d09a0 100644 --- a/storage/innodb_plugin/include/data0data.ic +++ b/storage/innodb_plugin/include/data0data.ic @@ -229,20 +229,30 @@ dfield_dup( } /*********************************************************************//** -Tests if data length and content is equal for two dfields. -@return TRUE if equal */ +Tests if two data fields are equal. +If len==0, tests the data length and content for equality. +If len>0, tests the first len bytes of the content for equality. +@return TRUE if both fields are NULL or if they are equal */ UNIV_INLINE ibool dfield_datas_are_binary_equal( /*==========================*/ const dfield_t* field1, /*!< in: field */ - const dfield_t* field2) /*!< in: field */ + const dfield_t* field2, /*!< in: field */ + ulint len) /*!< in: maximum prefix to compare, + or 0 to compare the whole field length */ { - ulint len; + ulint len2 = len; - len = field1->len; + if (field1->len == UNIV_SQL_NULL || len == 0 || field1->len < len) { + len = field1->len; + } - return(len == field2->len + if (field2->len == UNIV_SQL_NULL || len2 == 0 || field2->len < len2) { + len2 = field2->len; + } + + return(len == len2 && (len == UNIV_SQL_NULL || !memcmp(field1->data, field2->data, len))); } diff --git a/storage/innodb_plugin/include/row0upd.h b/storage/innodb_plugin/include/row0upd.h index ea14cd64213..58d7ad15ecc 100644 --- a/storage/innodb_plugin/include/row0upd.h +++ b/storage/innodb_plugin/include/row0upd.h @@ -286,10 +286,13 @@ row_upd_changes_ord_field_binary( row and the data values in update are not known when this function is called, e.g., at compile time */ + const row_ext_t*ext, /*!< NULL, or prefixes of the externally + stored columns in the old row */ dict_index_t* index, /*!< in: index of the record */ - const upd_t* update);/*!< in: update vector for the row; NOTE: the + const upd_t* update) /*!< in: update vector for the row; NOTE: the field numbers in this MUST be clustered index positions! */ + __attribute__((nonnull(3,4), warn_unused_result)); /***********************************************************//** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering diff --git a/storage/innodb_plugin/row/row0purge.c b/storage/innodb_plugin/row/row0purge.c index 31b255cf2d4..8bf2ae0f458 100644 --- a/storage/innodb_plugin/row/row0purge.c +++ b/storage/innodb_plugin/row/row0purge.c @@ -413,7 +413,7 @@ row_purge_upd_exist_or_extern( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field_binary(NULL, node->index, + if (row_upd_changes_ord_field_binary(NULL, NULL, node->index, node->update)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, NULL, diff --git a/storage/innodb_plugin/row/row0umod.c b/storage/innodb_plugin/row/row0umod.c index 5998dadd16d..49c51a75f18 100644 --- a/storage/innodb_plugin/row/row0umod.c +++ b/storage/innodb_plugin/row/row0umod.c @@ -668,8 +668,8 @@ row_undo_mod_upd_exist_sec( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field_binary(node->row, node->index, - node->update)) { + if (row_upd_changes_ord_field_binary( + node->row, node->ext, node->index, node->update)) { /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index 0dc550b93f5..20f31e15514 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -1198,20 +1198,21 @@ row_upd_changes_ord_field_binary( row and the data values in update are not known when this function is called, e.g., at compile time */ + const row_ext_t*ext, /*!< NULL, or prefixes of the externally + stored columns in the old row */ dict_index_t* index, /*!< in: index of the record */ const upd_t* update) /*!< in: update vector for the row; NOTE: the field numbers in this MUST be clustered index positions! */ { - ulint n_unique; - ulint n_upd_fields; - ulint i, j; - dict_index_t* clust_index; + ulint n_unique; + ulint i; + const dict_index_t* clust_index; - ut_ad(update && index); + ut_ad(update); + ut_ad(index); n_unique = dict_index_get_n_unique(index); - n_upd_fields = upd_get_n_fields(update); clust_index = dict_table_get_first_index(index->table); @@ -1219,33 +1220,72 @@ row_upd_changes_ord_field_binary( const dict_field_t* ind_field; const dict_col_t* col; - ulint col_pos; ulint col_no; + const upd_field_t* upd_field; + const dfield_t* dfield; + dfield_t dfield_ext; + ulint dfield_len; + const byte* buf; ind_field = dict_index_get_nth_field(index, i); col = dict_field_get_col(ind_field); - col_pos = dict_col_get_clust_pos(col, clust_index); col_no = dict_col_get_no(col); - for (j = 0; j < n_upd_fields; j++) { + upd_field = upd_get_field_by_field_no( + update, dict_col_get_clust_pos(col, clust_index)); - const upd_field_t* upd_field - = upd_get_nth_field(update, j); + if (upd_field == NULL) { + continue; + } - /* Note that if the index field is a column prefix - then it may be that row does not contain an externally - stored part of the column value, and we cannot compare - the datas */ + if (row == NULL) { + ut_ad(ext == NULL); + return(TRUE); + } - if (col_pos == upd_field->field_no - && (row == NULL - || ind_field->prefix_len > 0 - || !dfield_datas_are_binary_equal( - dtuple_get_nth_field(row, col_no), - &(upd_field->new_val)))) { + dfield = dtuple_get_nth_field(row, col_no); - return(TRUE); + /* This treatment of column prefix indexes is loosely + based on row_build_index_entry(). */ + + if (UNIV_LIKELY(ind_field->prefix_len == 0) + || dfield_is_null(dfield)) { + /* do nothing special */ + } else if (UNIV_LIKELY_NULL(ext)) { + /* See if the column is stored externally. */ + buf = row_ext_lookup(ext, col_no, &dfield_len); + + ut_ad(col->ord_part); + + if (UNIV_LIKELY_NULL(buf)) { + if (UNIV_UNLIKELY(buf == field_ref_zero)) { + /* This should never happen, but + we try to fail safe here. */ + ut_ad(0); + return(TRUE); + } + + goto copy_dfield; } + } else if (dfield_is_ext(dfield)) { + dfield_len = dfield_get_len(dfield); + ut_a(dfield_len > BTR_EXTERN_FIELD_REF_SIZE); + dfield_len -= BTR_EXTERN_FIELD_REF_SIZE; + ut_a(dict_index_is_clust(index) + || ind_field->prefix_len <= dfield_len); + buf = dfield_get_data(dfield); +copy_dfield: + ut_a(dfield_len > 0); + dfield_copy(&dfield_ext, dfield); + dfield_set_data(&dfield_ext, buf, dfield_len); + dfield = &dfield_ext; + } + + if (!dfield_datas_are_binary_equal( + dfield, &upd_field->new_val, + ind_field->prefix_len)) { + + return(TRUE); } } @@ -1329,7 +1369,7 @@ row_upd_changes_first_fields_binary( if (col_pos == upd_field->field_no && !dfield_datas_are_binary_equal( dtuple_get_nth_field(entry, i), - &(upd_field->new_val))) { + &upd_field->new_val, 0)) { return(TRUE); } @@ -1568,8 +1608,8 @@ row_upd_sec_step( ut_ad(!dict_index_is_clust(node->index)); if (node->state == UPD_NODE_UPDATE_ALL_SEC - || row_upd_changes_ord_field_binary(node->row, node->index, - node->update)) { + || row_upd_changes_ord_field_binary(node->row, node->ext, + node->index, node->update)) { return(row_upd_sec_index_entry(node, thr)); } @@ -1973,7 +2013,8 @@ exit_func: row_upd_store_row(node); - if (row_upd_changes_ord_field_binary(node->row, index, node->update)) { + if (row_upd_changes_ord_field_binary(node->row, node->ext, index, + node->update)) { /* Update causes an ordering field (ordering fields within the B-tree) of the clustered index record to change: perform From 721a890e480c7a704c94497167948ad797043517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 21 Dec 2010 13:27:22 +0200 Subject: [PATCH 2/2] Bug #55284 Double BLOB free due to lock wait while updating PRIMARY KEY This bug fix requires that Bug #58912 be fixed as well (bzr revision id marko.makela@oracle.com-20101221093919-mcmmgd4zpse9567d). Otherwise, another double BLOB free could occur when InnoDB would try to perform an update-in-place as delete-and-insert-by-update-in-place. row_upd_clust_rec_by_insert(): Do not disown the externally stored columns from the old record (btr_cur_mark_extern_inherited_fields()) until after checking the foreign key constraints and successfully inserting the updated record. If a lock wait timeout occurs between the delete-marking of the old record and the insertion of the updated record, mark the columns inherited before retrying the insert. Distinguish the state UPD_NODE_INSERT_BLOB from UPD_NODE_INSERT_CLUSTERED. btr_cur_del_mark_set_clust_rec(): Replace the cursor with block,rec,index,offsets so that the offsets need not be recalculated. Assert that rec is on a clustered index leaf page. btr_cur_disown_inherited_fields(): Renamed from btr_cur_mark_extern_inherited_fields(). Use upd_get_field_by_field_no(). Assert that there are externally stored columns. Assert that a mini-transaction is passed. Remove the return status. (The only caller, row_upd_clust_rec_by_insert(), will have determined that some fields have changed ownership.) btr_cur_mark_dtuple_inherited_extern(): Rename to row_upd_clust_rec_by_insert_inherit_func() and declare as static. Add the debug parameters rec, offsets. When rec is given, assert that the off-page columns match those in the inesrt tuple and that the off-page columns are owned by the record. Assert that the non-updated off-page columns in the insert tuple are owned, and mark them inherited. row_upd_clust_rec_by_insert_inherit(): A wrapper macro for row_upd_clust_rec_by_insert_inherit_func(). row_undo_mod_upd_exist_sec(): Adjust a comment about row_upd_clust_rec_by_insert(). rb:508 approved by Jimmy Yang --- storage/innodb_plugin/ChangeLog | 6 + storage/innodb_plugin/btr/btr0cur.c | 153 +++------------ storage/innodb_plugin/include/btr0cur.h | 42 ++-- storage/innodb_plugin/include/row0upd.h | 9 +- storage/innodb_plugin/row/row0umod.c | 9 +- storage/innodb_plugin/row/row0upd.c | 246 ++++++++++++++++++------ 6 files changed, 240 insertions(+), 225 deletions(-) diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 13e2836dc98..ce470a0c027 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,9 @@ +2010-12-21 The InnoDB Team + * include/btr0cur.h, include/row0upd.h, btr/btr0cur.c, + row/row0umod.c, row/row0upd.c: + Fix Bug#55284 Double free of off-page columns due to lock wait + while updating PRIMARY KEY + 2010-12-21 The InnoDB Team * include/data0data.h, include/data0data.ic, include/row0upd.h, diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 2a26d8ffb33..c57255a25ae 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -2509,27 +2509,24 @@ ulint btr_cur_del_mark_set_clust_rec( /*===========================*/ ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor */ + buf_block_t* block, /*!< in/out: buffer block of the record */ + rec_t* rec, /*!< in/out: record */ + dict_index_t* index, /*!< in: clustered index of the record */ + const ulint* offsets,/*!< in: rec_get_offsets(rec) */ ibool val, /*!< in: value to set */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr) /*!< in: mtr */ { - dict_index_t* index; - buf_block_t* block; roll_ptr_t roll_ptr; ulint err; - rec_t* rec; page_zip_des_t* page_zip; trx_t* trx; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - rec = btr_cur_get_rec(cursor); - index = cursor->index; + ut_ad(dict_index_is_clust(index)); + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); + ut_ad(buf_block_get_frame(block) == page_align(rec)); + ut_ad(page_is_leaf(page_align(rec))); #ifdef UNIV_DEBUG if (btr_cur_print_record_ops && thr) { @@ -2541,13 +2538,12 @@ btr_cur_del_mark_set_clust_rec( ut_ad(dict_index_is_clust(index)); ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - err = lock_clust_rec_modify_check_and_lock(flags, - btr_cur_get_block(cursor), + err = lock_clust_rec_modify_check_and_lock(flags, block, rec, index, offsets, thr); if (err != DB_SUCCESS) { - goto func_exit; + return(err); } err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr, @@ -2555,11 +2551,9 @@ btr_cur_del_mark_set_clust_rec( &roll_ptr); if (err != DB_SUCCESS) { - goto func_exit; + return(err); } - block = btr_cur_get_block(cursor); - if (block->is_hashed) { rw_lock_x_lock(&btr_search_latch); } @@ -2582,10 +2576,6 @@ btr_cur_del_mark_set_clust_rec( btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx, roll_ptr, mtr); -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } return(err); } @@ -3477,108 +3467,36 @@ btr_cur_set_ownership_of_extern_field( } /*******************************************************************//** -Marks not updated extern fields as not-owned by this record. The ownership -is transferred to the updated record which is inserted elsewhere in the +Marks non-updated off-page fields as disowned by this record. The ownership +must be transferred to the updated record which is inserted elsewhere in the index tree. In purge only the owner of externally stored field is allowed -to free the field. -@return TRUE if BLOB ownership was transferred */ +to free the field. */ UNIV_INTERN -ibool -btr_cur_mark_extern_inherited_fields( -/*=================================*/ +void +btr_cur_disown_inherited_fields( +/*============================*/ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed part will be updated, or NULL */ rec_t* rec, /*!< in/out: record in a clustered index */ dict_index_t* index, /*!< in: index of the page */ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ const upd_t* update, /*!< in: update vector */ - mtr_t* mtr) /*!< in: mtr, or NULL if not logged */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { - ulint n; - ulint j; ulint i; - ibool change_ownership = FALSE; - ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + ut_ad(rec_offs_any_extern(offsets)); + ut_ad(mtr); - if (!rec_offs_any_extern(offsets)) { - - return(FALSE); - } - - n = rec_offs_n_fields(offsets); - - for (i = 0; i < n; i++) { - if (rec_offs_nth_extern(offsets, i)) { - - /* Check it is not in updated fields */ - - if (update) { - for (j = 0; j < upd_get_n_fields(update); - j++) { - if (upd_get_nth_field(update, j) - ->field_no == i) { - - goto updated; - } - } - } - + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (rec_offs_nth_extern(offsets, i) + && !upd_get_field_by_field_no(update, i)) { btr_cur_set_ownership_of_extern_field( page_zip, rec, index, offsets, i, FALSE, mtr); - - change_ownership = TRUE; -updated: - ; } } - - return(change_ownership); -} - -/*******************************************************************//** -The complement of the previous function: in an update entry may inherit -some externally stored fields from a record. We must mark them as inherited -in entry, so that they are not freed in a rollback. */ -UNIV_INTERN -void -btr_cur_mark_dtuple_inherited_extern( -/*=================================*/ - dtuple_t* entry, /*!< in/out: updated entry to be - inserted to clustered index */ - const upd_t* update) /*!< in: update vector */ -{ - ulint i; - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - - dfield_t* dfield = dtuple_get_nth_field(entry, i); - byte* data; - ulint len; - ulint j; - - if (!dfield_is_ext(dfield)) { - continue; - } - - /* Check if it is in updated fields */ - - for (j = 0; j < upd_get_n_fields(update); j++) { - if (upd_get_nth_field(update, j)->field_no == i) { - - goto is_updated; - } - } - - data = dfield_get_data(dfield); - len = dfield_get_len(dfield); - data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN] - |= BTR_EXTERN_INHERITED_FLAG; - -is_updated: - ; - } } /*******************************************************************//** @@ -3616,29 +3534,6 @@ btr_cur_unmark_extern_fields( } } -/*******************************************************************//** -Marks all extern fields in a dtuple as owned by the record. */ -UNIV_INTERN -void -btr_cur_unmark_dtuple_extern_fields( -/*================================*/ - dtuple_t* entry) /*!< in/out: clustered index entry */ -{ - ulint i; - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - dfield_t* dfield = dtuple_get_nth_field(entry, i); - - if (dfield_is_ext(dfield)) { - byte* data = dfield_get_data(dfield); - ulint len = dfield_get_len(dfield); - - data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN] - &= ~BTR_EXTERN_OWNER_FLAG; - } - } -} - /*******************************************************************//** Flags the data tuple fields that are marked as extern storage in the update vector. We use this function to remember which fields we must diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h index 1e1dc0f580a..b477ad0320a 100644 --- a/storage/innodb_plugin/include/btr0cur.h +++ b/storage/innodb_plugin/include/btr0cur.h @@ -332,10 +332,14 @@ ulint btr_cur_del_mark_set_clust_rec( /*===========================*/ ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor */ + buf_block_t* block, /*!< in/out: buffer block of the record */ + rec_t* rec, /*!< in/out: record */ + dict_index_t* index, /*!< in: clustered index of the record */ + const ulint* offsets,/*!< in: rec_get_offsets(rec) */ ibool val, /*!< in: value to set */ que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr) /*!< in: mtr */ + __attribute__((nonnull)); /***********************************************************//** Sets a secondary index record delete mark to TRUE or FALSE. @return DB_SUCCESS, DB_LOCK_WAIT, or error number */ @@ -481,40 +485,22 @@ btr_estimate_number_of_different_key_vals( /*======================================*/ dict_index_t* index); /*!< in: index */ /*******************************************************************//** -Marks not updated extern fields as not-owned by this record. The ownership -is transferred to the updated record which is inserted elsewhere in the +Marks non-updated off-page fields as disowned by this record. The ownership +must be transferred to the updated record which is inserted elsewhere in the index tree. In purge only the owner of externally stored field is allowed -to free the field. -@return TRUE if BLOB ownership was transferred */ +to free the field. */ UNIV_INTERN -ibool -btr_cur_mark_extern_inherited_fields( -/*=================================*/ +void +btr_cur_disown_inherited_fields( +/*============================*/ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed part will be updated, or NULL */ rec_t* rec, /*!< in/out: record in a clustered index */ dict_index_t* index, /*!< in: index of the page */ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ const upd_t* update, /*!< in: update vector */ - mtr_t* mtr); /*!< in: mtr, or NULL if not logged */ -/*******************************************************************//** -The complement of the previous function: in an update entry may inherit -some externally stored fields from a record. We must mark them as inherited -in entry, so that they are not freed in a rollback. */ -UNIV_INTERN -void -btr_cur_mark_dtuple_inherited_extern( -/*=================================*/ - dtuple_t* entry, /*!< in/out: updated entry to be - inserted to clustered index */ - const upd_t* update); /*!< in: update vector */ -/*******************************************************************//** -Marks all extern fields in a dtuple as owned by the record. */ -UNIV_INTERN -void -btr_cur_unmark_dtuple_extern_fields( -/*================================*/ - dtuple_t* entry); /*!< in/out: clustered index entry */ + mtr_t* mtr) /*!< in/out: mini-transaction */ + __attribute__((nonnull(2,3,4,5,6))); /*******************************************************************//** Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The extern flags in rec will have to be set beforehand. diff --git a/storage/innodb_plugin/include/row0upd.h b/storage/innodb_plugin/include/row0upd.h index 58d7ad15ecc..b61e6b6dca1 100644 --- a/storage/innodb_plugin/include/row0upd.h +++ b/storage/innodb_plugin/include/row0upd.h @@ -465,11 +465,16 @@ struct upd_node_struct{ #define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be inserted, old record is already delete marked */ -#define UPD_NODE_UPDATE_ALL_SEC 4 /* an ordering field of the clustered +#define UPD_NODE_INSERT_BLOB 4 /* clustered index record should be + inserted, old record is already + delete-marked; non-updated BLOBs + should be inherited by the new record + and disowned by the old record */ +#define UPD_NODE_UPDATE_ALL_SEC 5 /* an ordering field of the clustered index record was changed, or this is a delete operation: should update all the secondary index records */ -#define UPD_NODE_UPDATE_SOME_SEC 5 /* secondary index entries should be +#define UPD_NODE_UPDATE_SOME_SEC 6 /* secondary index entries should be looked at and updated if an ordering field changed */ diff --git a/storage/innodb_plugin/row/row0umod.c b/storage/innodb_plugin/row/row0umod.c index 49c51a75f18..562f8093c38 100644 --- a/storage/innodb_plugin/row/row0umod.c +++ b/storage/innodb_plugin/row/row0umod.c @@ -676,11 +676,10 @@ row_undo_mod_upd_exist_sec( index, heap); if (UNIV_UNLIKELY(!entry)) { /* The server must have crashed in - row_upd_clust_rec_by_insert(), in - row_ins_index_entry_low() before - btr_store_big_rec_extern_fields() - has written the externally stored columns - (BLOBs) of the new clustered index entry. */ + row_upd_clust_rec_by_insert() before + the updated externally stored columns (BLOBs) + of the new clustered index entry were + written. */ /* The table must be in DYNAMIC or COMPRESSED format. REDUNDANT and COMPACT formats diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index 20f31e15514..248f270bd9f 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -1616,6 +1616,91 @@ row_upd_sec_step( return(DB_SUCCESS); } +#ifdef UNIV_DEBUG +# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \ + row_upd_clust_rec_by_insert_inherit_func(rec,offsets,entry,update) +#else /* UNIV_DEBUG */ +# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \ + row_upd_clust_rec_by_insert_inherit_func(entry,update) +#endif /* UNIV_DEBUG */ +/*******************************************************************//** +Mark non-updated off-page columns inherited when the primary key is +updated. We must mark them as inherited in entry, so that they are not +freed in a rollback. A limited version of this function used to be +called btr_cur_mark_dtuple_inherited_extern(). +@return TRUE if any columns were inherited */ +static __attribute__((warn_unused_result)) +ibool +row_upd_clust_rec_by_insert_inherit_func( +/*=====================================*/ +#ifdef UNIV_DEBUG + const rec_t* rec, /*!< in: old record, or NULL */ + const ulint* offsets,/*!< in: rec_get_offsets(rec), or NULL */ +#endif /* UNIV_DEBUG */ + dtuple_t* entry, /*!< in/out: updated entry to be + inserted into the clustered index */ + const upd_t* update) /*!< in: update vector */ +{ + ibool inherit = FALSE; + ulint i; + + ut_ad(!rec == !offsets); + ut_ad(!rec || rec_offs_any_extern(offsets)); + + for (i = 0; i < dtuple_get_n_fields(entry); i++) { + dfield_t* dfield = dtuple_get_nth_field(entry, i); + byte* data; + ulint len; + + ut_ad(!offsets + || !rec_offs_nth_extern(offsets, i) + == !dfield_is_ext(dfield) + || upd_get_field_by_field_no(update, i)); + if (!dfield_is_ext(dfield) + || upd_get_field_by_field_no(update, i)) { + continue; + } + +#ifdef UNIV_DEBUG + if (UNIV_LIKELY(rec != NULL)) { + const byte* rec_data + = rec_get_nth_field(rec, offsets, i, &len); + ut_ad(len == dfield_get_len(dfield)); + ut_ad(len != UNIV_SQL_NULL); + ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE); + + rec_data += len - BTR_EXTERN_FIELD_REF_SIZE; + + /* The pointer must not be zero. */ + ut_ad(memcmp(rec_data, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + /* The BLOB must be owned. */ + ut_ad(!(rec_data[BTR_EXTERN_LEN] + & BTR_EXTERN_OWNER_FLAG)); + } +#endif /* UNIV_DEBUG */ + + len = dfield_get_len(dfield); + ut_a(len != UNIV_SQL_NULL); + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + data = dfield_get_data(dfield) + len + - BTR_EXTERN_FIELD_REF_SIZE; + /* The pointer must not be zero. */ + ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)); + /* The BLOB must be owned. */ + ut_a(!(data[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); + + data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG; + /* The BTR_EXTERN_INHERITED_FLAG only matters in + rollback. Purge will always free the extern fields of + a delete-marked row. */ + + inherit = TRUE; + } + + return(inherit); +} + /***********************************************************//** Marks the clustered index record deleted and inserts the updated version of the record to the index. This function should be used when the ordering @@ -1634,14 +1719,16 @@ row_upd_clust_rec_by_insert( a foreign key constraint */ mtr_t* mtr) /*!< in/out: mtr; gets committed here */ { - mem_heap_t* heap = NULL; + mem_heap_t* heap; btr_pcur_t* pcur; btr_cur_t* btr_cur; trx_t* trx; dict_table_t* table; dtuple_t* entry; ulint err; - ibool change_ownership = FALSE; + ibool change_ownership = FALSE; + rec_t* rec; + ulint* offsets = NULL; ut_ad(node); ut_ad(dict_index_is_clust(index)); @@ -1651,53 +1738,7 @@ row_upd_clust_rec_by_insert( pcur = node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); - if (node->state != UPD_NODE_INSERT_CLUSTERED) { - rec_t* rec; - dict_index_t* index; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets; - rec_offs_init(offsets_); - - err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, - btr_cur, TRUE, thr, mtr); - if (err != DB_SUCCESS) { - mtr_commit(mtr); - return(err); - } - - /* Mark as not-owned the externally stored fields which the new - row inherits from the delete marked record: purge should not - free those externally stored fields even if the delete marked - record is removed from the index tree, or updated. */ - - rec = btr_cur_get_rec(btr_cur); - index = dict_table_get_first_index(table); - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - change_ownership = btr_cur_mark_extern_inherited_fields( - btr_cur_get_page_zip(btr_cur), rec, index, offsets, - node->update, mtr); - if (check_ref) { - /* NOTE that the following call loses - the position of pcur ! */ - err = row_upd_check_references_constraints( - node, pcur, table, index, offsets, thr, mtr); - if (err != DB_SUCCESS) { - mtr_commit(mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); - } - } - } - - mtr_commit(mtr); - - if (!heap) { - heap = mem_heap_create(500); - } - node->state = UPD_NODE_INSERT_CLUSTERED; + heap = mem_heap_create(1000); entry = row_build_index_entry(node->upd_row, node->upd_ext, index, heap); @@ -1705,23 +1746,104 @@ row_upd_clust_rec_by_insert( row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); - if (change_ownership) { - /* If we return from a lock wait, for example, we may have - extern fields marked as not-owned in entry (marked in the - if-branch above). We must unmark them, take the ownership - back. */ + switch (node->state) { + default: + ut_error; + case UPD_NODE_INSERT_BLOB: + /* A lock wait occurred in row_ins_index_entry() in + the previous invocation of this function. Mark the + off-page columns in the entry inherited. */ - btr_cur_unmark_dtuple_extern_fields(entry); + change_ownership = row_upd_clust_rec_by_insert_inherit( + NULL, NULL, entry, node->update); + ut_a(change_ownership); + /* fall through */ + case UPD_NODE_INSERT_CLUSTERED: + /* A lock wait occurred in row_ins_index_entry() in + the previous invocation of this function. */ + break; + case UPD_NODE_UPDATE_CLUSTERED: + /* This is the first invocation of the function where + we update the primary key. Delete-mark the old record + in the clustered index and prepare to insert a new entry. */ + rec = btr_cur_get_rec(btr_cur); + offsets = rec_get_offsets(rec, index, NULL, + ULINT_UNDEFINED, &heap); + ut_ad(page_rec_is_user_rec(rec)); - /* We must mark non-updated extern fields in entry as - inherited, so that a possible rollback will not free them. */ + err = btr_cur_del_mark_set_clust_rec( + BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur), + rec, index, offsets, TRUE, thr, mtr); + if (err != DB_SUCCESS) { +err_exit: + mtr_commit(mtr); + mem_heap_free(heap); + return(err); + } - btr_cur_mark_dtuple_inherited_extern(entry, node->update); + /* If the the new row inherits externally stored + fields (off-page columns a.k.a. BLOBs) from the + delete-marked old record, mark them disowned by the + old record and owned by the new entry. */ + + if (rec_offs_any_extern(offsets)) { + change_ownership = row_upd_clust_rec_by_insert_inherit( + rec, offsets, entry, node->update); + + if (change_ownership) { + btr_pcur_store_position(pcur, mtr); + } + } + + if (check_ref) { + /* NOTE that the following call loses + the position of pcur ! */ + err = row_upd_check_references_constraints( + node, pcur, table, index, offsets, thr, mtr); + if (err != DB_SUCCESS) { + goto err_exit; + } + } } + mtr_commit(mtr); + err = row_ins_index_entry(index, entry, node->upd_ext ? node->upd_ext->n_ext : 0, TRUE, thr); + node->state = change_ownership + ? UPD_NODE_INSERT_BLOB + : UPD_NODE_INSERT_CLUSTERED; + + if (err == DB_SUCCESS && change_ownership) { + /* Mark the non-updated fields disowned by the old record. */ + + /* NOTE: this transaction has an x-lock on the record + and therefore other transactions cannot modify the + record when we have no latch on the page. In addition, + we assume that other query threads of the same + transaction do not modify the record in the meantime. + Therefore we can assert that the restoration of the + cursor succeeds. */ + + mtr_start(mtr); + + if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr)) { + ut_error; + } + + rec = btr_cur_get_rec(btr_cur); + offsets = rec_get_offsets(rec, index, offsets, + ULINT_UNDEFINED, &heap); + ut_ad(page_rec_is_user_rec(rec)); + + btr_cur_disown_inherited_fields( + btr_cur_get_page_zip(btr_cur), + rec, index, offsets, node->update, mtr); + + mtr_commit(mtr); + } + mem_heap_free(heap); return(err); @@ -1865,8 +1987,9 @@ row_upd_del_mark_clust_rec( /* Mark the clustered index record deleted; we do not have to check locks, because we assume that we have an x-lock on the record */ - err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, - btr_cur, TRUE, thr, mtr); + err = btr_cur_del_mark_set_clust_rec( + BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur), + btr_cur_get_rec(btr_cur), index, offsets, TRUE, thr, mtr); if (err == DB_SUCCESS && check_ref) { /* NOTE that the following call loses the position of pcur ! */ @@ -2083,7 +2206,8 @@ row_upd( } if (node->state == UPD_NODE_UPDATE_CLUSTERED - || node->state == UPD_NODE_INSERT_CLUSTERED) { + || node->state == UPD_NODE_INSERT_CLUSTERED + || node->state == UPD_NODE_INSERT_BLOB) { log_free_check(); err = row_upd_clust_step(node, thr);