diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c index af2029bf1e8..6da323867fb 100644 --- a/innobase/btr/btr0btr.c +++ b/innobase/btr/btr0btr.c @@ -1738,8 +1738,8 @@ btr_node_ptr_delete( btr_cur_position(UT_LIST_GET_FIRST(tree->tree_indexes), node_ptr, &cursor); - compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, mtr); - + compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE, + mtr); ut_a(err == DB_SUCCESS); if (!compressed) { diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c index 47a67d425cd..7783f618d6d 100644 --- a/innobase/btr/btr0cur.c +++ b/innobase/btr/btr0cur.c @@ -80,6 +80,9 @@ btr_rec_free_updated_extern_fields( X-latched */ rec_t* rec, /* in: record */ upd_t* update, /* in: update vector */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* mtr); /* in: mini-transaction handle which contains an X-latch to record page and to the tree */ @@ -813,7 +816,7 @@ calculate_sizes_again: /* The record is so big that we have to store some fields externally on separate database pages */ - big_rec_vec = dtuple_convert_big_rec(index, entry); + big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0); if (big_rec_vec == NULL) { @@ -1021,7 +1024,7 @@ btr_cur_pessimistic_insert( /* The record is so big that we have to store some fields externally on separate database pages */ - big_rec_vec = dtuple_convert_big_rec(index, entry); + big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0); if (big_rec_vec == NULL) { @@ -1242,6 +1245,7 @@ btr_cur_update_in_place( rec_t* rec; dulint roll_ptr; trx_t* trx; + ibool was_delete_marked; /* Only clustered index records are updated using this function */ ut_ad((cursor->index)->type & DICT_CLUSTERED); @@ -1270,6 +1274,8 @@ btr_cur_update_in_place( /* FIXME: in a mixed tree, all records may not have enough ordering fields for btr search: */ + + was_delete_marked = rec_get_deleted_flag(rec); row_upd_rec_in_place(rec, update); @@ -1279,6 +1285,13 @@ btr_cur_update_in_place( btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr, mtr); + if (was_delete_marked && !rec_get_deleted_flag(rec)) { + /* The new updated record owns its possible externally + stored fields */ + + btr_cur_unmark_extern_fields(rec, mtr); + } + return(DB_SUCCESS); } @@ -1434,6 +1447,13 @@ btr_cur_optimistic_update( ut_a(rec); /* <- We calculated above the insert would fit */ + if (!rec_get_deleted_flag(rec)) { + /* The new inserted record owns its possible externally + stored fields */ + + btr_cur_unmark_extern_fields(rec, mtr); + } + /* Restore the old explicit lock state on the record */ lock_rec_restore_from_page_infimum(rec, page); @@ -1655,11 +1675,15 @@ btr_cur_pessimistic_update( if (flags & BTR_NO_UNDO_LOG_FLAG) { /* We are in a transaction rollback undoing a row update: we must free possible externally stored fields - which got new values in the update */ + which got new values in the update, if they are not + inherited values. They can be inherited if we have + updated the primary key to another value, and then + update it back again. */ ut_a(big_rec_vec == NULL); - btr_rec_free_updated_extern_fields(index, rec, update, mtr); + btr_rec_free_updated_extern_fields(index, rec, update, + TRUE, mtr); } /* We have to set appropriate extern storage bits in the new @@ -1676,8 +1700,8 @@ btr_cur_pessimistic_update( page_get_free_space_of_empty() / 2) || (rec_get_converted_size(new_entry) >= REC_MAX_DATA_SIZE)) { - big_rec_vec = dtuple_convert_big_rec(index, new_entry); - + big_rec_vec = dtuple_convert_big_rec(index, new_entry, + ext_vect, n_ext_vect); if (big_rec_vec == NULL) { mem_heap_free(heap); @@ -1694,6 +1718,13 @@ btr_cur_pessimistic_update( lock_rec_restore_from_page_infimum(rec, page); rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr); + if (!rec_get_deleted_flag(rec)) { + /* The new inserted record owns its possible externally + stored fields */ + + btr_cur_unmark_extern_fields(rec, mtr); + } + btr_cur_compress_if_useful(cursor, mtr); err = DB_SUCCESS; @@ -1725,6 +1756,13 @@ btr_cur_pessimistic_update( rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr); + if (!rec_get_deleted_flag(rec)) { + /* The new inserted record owns its possible externally + stored fields */ + + btr_cur_unmark_extern_fields(rec, mtr); + } + lock_rec_restore_from_page_infimum(rec, page); /* If necessary, restore also the correct lock state for a new, @@ -2183,6 +2221,7 @@ btr_cur_pessimistic_delete( if compression does not occur, the cursor stays valid: it points to successor of deleted record on function exit */ + ibool in_rollback,/* in: TRUE if called in rollback */ mtr_t* mtr) /* in: mtr */ { page_t* page; @@ -2218,7 +2257,8 @@ btr_cur_pessimistic_delete( } btr_rec_free_externally_stored_fields(cursor->index, - btr_cur_get_rec(cursor), mtr); + btr_cur_get_rec(cursor), in_rollback, mtr); + if ((page_get_n_recs(page) < 2) && (dict_tree_get_page(btr_cur_get_tree(cursor)) != buf_frame_get_page_no(page))) { @@ -2516,6 +2556,199 @@ btr_estimate_number_of_different_key_vals( /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ +/*********************************************************************** +Sets the ownership bit of an externally stored field in a record. */ +static +void +btr_cur_set_ownership_of_extern_field( +/*==================================*/ + rec_t* rec, /* in: clustered index record */ + ulint i, /* in: field number */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr */ +{ + byte* data; + ulint local_len; + ulint byte_val; + + data = rec_get_nth_field(rec, i, &local_len); + + ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); + + local_len -= BTR_EXTERN_FIELD_REF_SIZE; + + byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN); + + if (val) { + byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG); + } else { + byte_val = byte_val | BTR_EXTERN_OWNER_FLAG; + } + + mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val, + MLOG_1BYTE, mtr); +} + +/*********************************************************************** +Marks not updated extern fields as not-owned by this record. The ownership +is transferred to the updated record which is inserted elsewhere in the +index tree. In purge only the owner of externally stored field is allowed +to free the field. */ + +void +btr_cur_mark_extern_inherited_fields( +/*=================================*/ + rec_t* rec, /* in: record in a clustered index */ + upd_t* update, /* in: update vector */ + mtr_t* mtr) /* in: mtr */ +{ + ibool is_updated; + ulint n; + ulint j; + ulint i; + + n = rec_get_n_fields(rec); + + for (i = 0; i < n; i++) { + if (rec_get_nth_field_extern_bit(rec, i)) { + + /* Check it is not in updated fields */ + is_updated = FALSE; + + if (update) { + for (j = 0; j < upd_get_n_fields(update); + j++) { + if (upd_get_nth_field(update, j) + ->field_no == i) { + is_updated = TRUE; + } + } + } + + if (!is_updated) { + btr_cur_set_ownership_of_extern_field(rec, i, + FALSE, mtr); + } + } + } +} + +/*********************************************************************** +The complement of the previous function: in an update entry may inherit +some externally stored fields from a record. We must mark them as inherited +in entry, so that they are not freed in a rollback. */ + +void +btr_cur_mark_dtuple_inherited_extern( +/*=================================*/ + dtuple_t* entry, /* in: updated entry to be inserted to + clustered index */ + ulint* ext_vec, /* in: array of extern fields in the + original record */ + ulint n_ext_vec, /* in: number of elements in ext_vec */ + upd_t* update) /* in: update vector */ +{ + dfield_t* dfield; + ulint byte_val; + byte* data; + ulint len; + ibool is_updated; + ulint j; + ulint i; + + if (ext_vec == NULL) { + + return; + } + + for (i = 0; i < n_ext_vec; i++) { + + /* Check ext_vec[i] is in updated fields */ + is_updated = FALSE; + + for (j = 0; j < upd_get_n_fields(update); j++) { + if (upd_get_nth_field(update, j)->field_no + == ext_vec[i]) { + is_updated = TRUE; + } + } + + if (!is_updated) { + dfield = dtuple_get_nth_field(entry, ext_vec[i]); + + data = dfield_get_data(dfield); + len = dfield_get_len(dfield); + + len -= BTR_EXTERN_FIELD_REF_SIZE; + + byte_val = mach_read_from_1(data + len + + BTR_EXTERN_LEN); + + byte_val = byte_val | BTR_EXTERN_INHERITED_FLAG; + + mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val); + } + } +} + +/*********************************************************************** +Marks all extern fields in a record as owned by the record. This function +should be called if the delete mark of a record is removed: a not delete +marked record always owns all its extern fields. */ + +void +btr_cur_unmark_extern_fields( +/*=========================*/ + rec_t* rec, /* in: record in a clustered index */ + mtr_t* mtr) /* in: mtr */ +{ + ulint n; + ulint i; + + n = rec_get_n_fields(rec); + + for (i = 0; i < n; i++) { + if (rec_get_nth_field_extern_bit(rec, i)) { + + btr_cur_set_ownership_of_extern_field(rec, i, + TRUE, mtr); + } + } +} + +/*********************************************************************** +Marks all extern fields in a dtuple as owned by the record. */ + +void +btr_cur_unmark_dtuple_extern_fields( +/*================================*/ + dtuple_t* entry, /* in: clustered index entry */ + ulint* ext_vec, /* in: array of numbers of fields + which have been stored externally */ + ulint n_ext_vec) /* in: number of elements in ext_vec */ +{ + dfield_t* dfield; + ulint byte_val; + byte* data; + ulint len; + ulint i; + + for (i = 0; i < n_ext_vec; i++) { + dfield = dtuple_get_nth_field(entry, ext_vec[i]); + + data = dfield_get_data(dfield); + len = dfield_get_len(dfield); + + len -= BTR_EXTERN_FIELD_REF_SIZE; + + byte_val = mach_read_from_1(data + len + BTR_EXTERN_LEN); + + byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG); + + mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val); + } +} + /*********************************************************************** Stores the positions of the fields marked as extern storage in the update vector, and also those fields who are marked as extern storage in rec @@ -2766,7 +2999,9 @@ btr_store_big_rec_extern_fields( /*********************************************************************** Frees the space in an externally stored field to the file space -management. */ +management if the field in data is owned the externally stored field, +in a rollback we may have the additional condition that the field must +not be inherited. */ void btr_free_externally_stored_field( @@ -2777,6 +3012,9 @@ btr_free_externally_stored_field( + reference to the externally stored part */ ulint local_len, /* in: length of data */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* local_mtr) /* in: mtr containing the latch to data an an X-latch to the index tree */ @@ -2828,6 +3066,26 @@ btr_free_externally_stored_field( return; } + if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN) + & BTR_EXTERN_OWNER_FLAG) { + /* This field does not own the externally + stored field: do not free! */ + + mtr_commit(&mtr); + + return; + } + + if (do_not_free_inherited + && mach_read_from_1(data + local_len + BTR_EXTERN_LEN) + & BTR_EXTERN_INHERITED_FLAG) { + /* Rollback and inherited field: do not free! */ + + mtr_commit(&mtr); + + return; + } + page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr); buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE); @@ -2872,6 +3130,9 @@ btr_rec_free_externally_stored_fields( dict_index_t* index, /* in: index of the data, the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* mtr) /* in: mini-transaction handle which contains an X-latch to record page and to the index tree */ @@ -2896,7 +3157,8 @@ btr_rec_free_externally_stored_fields( if (rec_get_nth_field_extern_bit(rec, i)) { data = rec_get_nth_field(rec, i, &len); - btr_free_externally_stored_field(index, data, len, mtr); + btr_free_externally_stored_field(index, data, len, + do_not_free_inherited, mtr); } } } @@ -2912,6 +3174,9 @@ btr_rec_free_updated_extern_fields( X-latched */ rec_t* rec, /* in: record */ upd_t* update, /* in: update vector */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* mtr) /* in: mini-transaction handle which contains an X-latch to record page and to the tree */ { @@ -2938,7 +3203,8 @@ btr_rec_free_updated_extern_fields( if (rec_get_nth_field_extern_bit(rec, ufield->field_no)) { data = rec_get_nth_field(rec, ufield->field_no, &len); - btr_free_externally_stored_field(index, data, len, mtr); + btr_free_externally_stored_field(index, data, len, + do_not_free_inherited, mtr); } } } diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c index ac4e7c5ba3f..616f8911aba 100644 --- a/innobase/btr/btr0sea.c +++ b/innobase/btr/btr0sea.c @@ -769,6 +769,11 @@ btr_search_guess_on_hash( buf_page_make_young(page); } + /* Increment the page get statistics though we did not really + fix the page: for user info only */ + + buf_pool->n_page_gets++; + return(TRUE); /*-------------------------------------------*/ diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index 3fabe6c6d0e..f485088a5b7 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -349,6 +349,10 @@ buf_pool_create( buf_pool->n_pages_written = 0; buf_pool->n_pages_created = 0; + buf_pool->n_page_gets = 0; + buf_pool->n_page_gets_old = 0; + buf_pool->n_pages_read_old = 0; + /* 2. Initialize flushing fields ---------------------------- */ UT_LIST_INIT(buf_pool->flush_list); @@ -667,6 +671,7 @@ buf_page_get_gen( #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside() || ibuf_page(space, offset)); #endif + buf_pool->n_page_gets++; loop: mutex_enter_fast(&(buf_pool->mutex)); @@ -846,6 +851,8 @@ buf_page_optimistic_get_func( ut_ad(mtr && guess); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); + buf_pool->n_page_gets++; + block = buf_block_align(guess); mutex_enter(&(buf_pool->mutex)); @@ -976,6 +983,8 @@ buf_page_get_known_nowait( ut_ad(mtr); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); + buf_pool->n_page_gets++; + block = buf_block_align(guess); mutex_enter(&(buf_pool->mutex)); @@ -1643,6 +1652,18 @@ buf_print_io(void) printf("Pages read %lu, created %lu, written %lu\n", buf_pool->n_pages_read, buf_pool->n_pages_created, buf_pool->n_pages_written); + + if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) { + printf("Buffer pool hit rate %lu / 1000\n", + 1000 + - ((1000 * + (buf_pool->n_pages_read - buf_pool->n_pages_read_old)) + / (buf_pool->n_page_gets - buf_pool->n_page_gets_old))); + } + + buf_pool->n_page_gets_old = buf_pool->n_page_gets; + buf_pool->n_pages_read_old = buf_pool->n_pages_read; + mutex_exit(&(buf_pool->mutex)); } diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c index 4172fb9c8ce..97db9d72f98 100644 --- a/innobase/data/data0data.c +++ b/innobase/data/data0data.c @@ -395,7 +395,12 @@ dtuple_convert_big_rec( the entry enough, i.e., if there are too many short fields in entry */ dict_index_t* index, /* in: index */ - dtuple_t* entry) /* in: index entry */ + dtuple_t* entry, /* in: index entry */ + ulint* ext_vec,/* in: array of externally stored fields, + or NULL: if a field already is externally + stored, then we cannot move it to the vector + this function returns */ + ulint n_ext_vec)/* in: number of elements is ext_vec */ { mem_heap_t* heap; big_rec_t* vector; @@ -404,7 +409,9 @@ dtuple_convert_big_rec( ulint n_fields; ulint longest; ulint longest_i; + ibool is_externally_stored; ulint i; + ulint j; size = rec_get_converted_size(entry); @@ -431,9 +438,23 @@ dtuple_convert_big_rec( for (i = dict_index_get_n_unique_in_tree(index); i < dtuple_get_n_fields(entry); i++) { + /* Skip over fields which already are externally + stored */ + + is_externally_stored = FALSE; + + if (ext_vec) { + for (j = 0; j < n_ext_vec; j++) { + if (ext_vec[j] == i) { + is_externally_stored = TRUE; + } + } + } + /* Skip over fields which are ordering in some index */ - if (dict_field_get_col( + if (!is_externally_stored && + dict_field_get_col( dict_index_get_nth_field(index, i)) ->ord_part == 0) { diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c index 5c783627721..b386f224d11 100644 --- a/innobase/fil/fil0fil.c +++ b/innobase/fil/fil0fil.c @@ -19,6 +19,7 @@ Created 10/25/1995 Heikki Tuuri #include "log0log.h" #include "log0recv.h" #include "fsp0fsp.h" +#include "srv0srv.h" /* IMPLEMENTATION OF THE LOW-LEVEL FILE SYSTEM @@ -1152,6 +1153,7 @@ fil_aio_wait( ut_ad(fil_validate()); if (os_aio_use_native_aio) { + srv_io_thread_op_info[segment] = "native aio handle"; #ifdef WIN_ASYNC_IO ret = os_aio_windows_handle(segment, 0, &fil_node, &message, &type); @@ -1161,12 +1163,16 @@ fil_aio_wait( ut_a(0); #endif } else { + srv_io_thread_op_info[segment] = "simulated aio handle"; + ret = os_aio_simulated_handle(segment, (void**) &fil_node, &message, &type); } ut_a(ret); - + + srv_io_thread_op_info[segment] = "complete io for fil node"; + mutex_enter(&(system->mutex)); fil_node_complete_io(fil_node, fil_system, type); @@ -1178,9 +1184,10 @@ fil_aio_wait( /* Do the i/o handling */ if (buf_pool_is_block(message)) { - + srv_io_thread_op_info[segment] = "complete io for buf page"; buf_page_io_complete(message); } else { + srv_io_thread_op_info[segment] = "complete io for log"; log_io_complete(message); } } diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c index fd7b415551f..fa1c630dc08 100644 --- a/innobase/ibuf/ibuf0ibuf.c +++ b/innobase/ibuf/ibuf0ibuf.c @@ -2341,7 +2341,7 @@ ibuf_delete_rec( root = ibuf_tree_root_get(ibuf_data, space, mtr); btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), - mtr); + FALSE, mtr); ut_a(err == DB_SUCCESS); #ifdef UNIV_IBUF_DEBUG diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h index ffae434a5d9..f56a5662253 100644 --- a/innobase/include/btr0cur.h +++ b/innobase/include/btr0cur.h @@ -353,6 +353,7 @@ btr_cur_pessimistic_delete( if compression does not occur, the cursor stays valid: it points to successor of deleted record on function exit */ + ibool in_rollback,/* in: TRUE if called in rollback */ mtr_t* mtr); /* in: mtr */ /*************************************************************** Parses a redo log record of updating a record in-place. */ @@ -418,6 +419,52 @@ btr_estimate_number_of_different_key_vals( /* out: estimated number of key values */ dict_index_t* index); /* in: index */ /*********************************************************************** +Marks not updated extern fields as not-owned by this record. The ownership +is transferred to the updated record which is inserted elsewhere in the +index tree. In purge only the owner of externally stored field is allowed +to free the field. */ + +void +btr_cur_mark_extern_inherited_fields( +/*=================================*/ + rec_t* rec, /* in: record in a clustered index */ + upd_t* update, /* in: update vector */ + mtr_t* mtr); /* in: mtr */ +/*********************************************************************** +The complement of the previous function: in an update entry may inherit +some externally stored fields from a record. We must mark them as inherited +in entry, so that they are not freed in a rollback. */ + +void +btr_cur_mark_dtuple_inherited_extern( +/*=================================*/ + dtuple_t* entry, /* in: updated entry to be inserted to + clustered index */ + ulint* ext_vec, /* in: array of extern fields in the + original record */ + ulint n_ext_vec, /* in: number of elements in ext_vec */ + upd_t* update); /* in: update vector */ +/*********************************************************************** +Marks all extern fields in a record as owned by the record. This function +should be called if the delete mark of a record is removed: a not delete +marked record always owns all its extern fields. */ + +void +btr_cur_unmark_extern_fields( +/*=========================*/ + rec_t* rec, /* in: record in a clustered index */ + mtr_t* mtr); /* in: mtr */ +/*********************************************************************** +Marks all extern fields in a dtuple as owned by the record. */ + +void +btr_cur_unmark_dtuple_extern_fields( +/*================================*/ + dtuple_t* entry, /* in: clustered index entry */ + ulint* ext_vec, /* in: array of numbers of fields + which have been stored externally */ + ulint n_ext_vec); /* in: number of elements in ext_vec */ +/*********************************************************************** Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The fields are stored on pages allocated from leaf node file segment of the index tree. */ @@ -435,7 +482,9 @@ btr_store_big_rec_extern_fields( rec and to the tree */ /*********************************************************************** Frees the space in an externally stored field to the file space -management. */ +management if the field in data is owned the externally stored field, +in a rollback we may have the additional condition that the field must +not be inherited. */ void btr_free_externally_stored_field( @@ -446,6 +495,9 @@ btr_free_externally_stored_field( + reference to the externally stored part */ ulint local_len, /* in: length of data */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* local_mtr); /* in: mtr containing the latch to data an an X-latch to the index tree */ @@ -458,6 +510,9 @@ btr_rec_free_externally_stored_fields( dict_index_t* index, /* in: index of the data, the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* mtr); /* in: mini-transaction handle which contains an X-latch to record page and to the index tree */ @@ -620,10 +675,21 @@ and sleep this many microseconds in between */ on that page */ #define BTR_EXTERN_LEN 12 /* 8 bytes containing the length of the externally - stored part of the BLOB */ + stored part of the BLOB. + The 2 highest bits are + reserved to the flags below. */ /*--------------------------------------*/ #define BTR_EXTERN_FIELD_REF_SIZE 20 +/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte +at lowest address) is set to 1 if this field does not 'own' the externally +stored field; only the owner field is allowed to free the field in purge! +If the 2nd highest bit is 1 then it means that the externally stored field +was inherited from an earlier version of the row. In rollback we are not +allowed to free an inherited external field. */ + +#define BTR_EXTERN_OWNER_FLAG 128 +#define BTR_EXTERN_INHERITED_FLAG 64 extern ulint btr_cur_n_non_sea; diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index 8b22561adf8..66071030402 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -771,6 +771,17 @@ struct buf_pool_struct{ ulint n_pages_written;/* number write operations */ ulint n_pages_created;/* number of pages created in the pool with no read */ + ulint n_page_gets; /* number of page gets performed; + also successful seraches through + the adaptive hash index are + counted as page gets; this field + is NOT protected by the buffer + pool mutex */ + ulint n_page_gets_old;/* n_page_gets when buf_print was + last time called: used to calculate + hit rate */ + ulint n_pages_read_old;/* n_pages_read when buf_print was + last time called */ /* 2. Page flushing algorithm fields */ UT_LIST_BASE_NODE_T(buf_block_t) flush_list; diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h index f695e0989a5..c314281d758 100644 --- a/innobase/include/data0data.h +++ b/innobase/include/data0data.h @@ -329,7 +329,12 @@ dtuple_convert_big_rec( the entry enough, i.e., if there are too many short fields in entry */ dict_index_t* index, /* in: index */ - dtuple_t* entry); /* in: index entry */ + dtuple_t* entry, /* in: index entry */ + ulint* ext_vec,/* in: array of externally stored fields, + or NULL: if a field already is externally + stored, then we cannot move it to the vector + this function returns */ + ulint n_ext_vec);/* in: number of elements is ext_vec */ /****************************************************************** Puts back to entry the data stored in vector. Note that to ensure the fields in entry can accommodate the data, vector must have been created diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index e635964e5ec..3f014adb76c 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -62,7 +62,15 @@ extern int srv_query_thread_priority; /*-------------------------------------------*/ +extern ulint srv_n_rows_inserted; +extern ulint srv_n_rows_updated; +extern ulint srv_n_rows_deleted; +extern ulint srv_n_rows_read; + extern ibool srv_print_innodb_monitor; +extern ibool srv_print_innodb_lock_monitor; +extern ibool srv_print_innodb_tablespace_monitor; + extern ulint srv_n_spin_wait_rounds; extern ulint srv_spin_wait_delay; extern ibool srv_priority_boost; @@ -105,13 +113,19 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, it from dynamic memory to get it to the same DRAM page as other hotspot semaphores */ #define kernel_mutex (*kernel_mutex_temp) + +#define SRV_MAX_N_IO_THREADS 100 +/* Array of English strings describing the current state of an +i/o handler thread */ +extern char* srv_io_thread_op_info[]; + typedef struct srv_sys_struct srv_sys_t; /* The server system */ extern srv_sys_t* srv_sys; -/* Alternatives for fiel flush option in Unix; see the InnoDB manual about +/* Alternatives for the field flush option in Unix; see the InnoDB manual about what these mean */ #define SRV_UNIX_FDATASYNC 1 #define SRV_UNIX_O_DSYNC 2 diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h index e26f7e19850..0295cd6abff 100644 --- a/innobase/include/trx0sys.h +++ b/innobase/include/trx0sys.h @@ -315,6 +315,9 @@ struct trx_sys_struct{ /* List of active and committed in memory transactions, sorted on trx id, biggest first */ + UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list; + /* List of transactions created + for MySQL */ UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list; /* List of rollback segment objects */ trx_rseg_t* latest_rseg; /* Latest rollback segment in the diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h index f67ba43162d..fdef041e929 100644 --- a/innobase/include/trx0trx.h +++ b/innobase/include/trx0trx.h @@ -130,6 +130,14 @@ void trx_mark_sql_stat_end( /*==================*/ trx_t* trx); /* in: trx handle */ +/************************************************************************** +Marks the latest SQL statement ended but does not start a new transaction +if the trx is not started. */ + +void +trx_mark_sql_stat_end_do_not_start_new( +/*===================================*/ + trx_t* trx); /* in: trx handle */ /************************************************************************ Assigns a read view for a consistent read query. All the consistent reads within the same transaction will get the same read view, which is created @@ -236,6 +244,14 @@ trx_commit_step( /*============*/ /* out: query thread to run next, or NULL */ que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Prints info about a transaction to the standard output. The caller must +own the kernel mutex. */ + +void +trx_print( +/*======*/ + trx_t* trx); /* in: transaction */ /* Signal to a transaction */ @@ -270,6 +286,9 @@ rolling back after a database recovery */ struct trx_struct{ /* All the next fields are protected by the kernel mutex, except the undo logs which are protected by undo_mutex */ + char* op_info; /* English text describing the + current operation, or an empty + string */ ulint type; /* TRX_USER, TRX_PURGE */ ulint conc_state; /* state of the trx from the point of view of concurrency control: @@ -284,6 +303,8 @@ struct trx_struct{ table */ dulint table_id; /* table id if the preceding field is TRUE */ + void* mysql_thd; /* MySQL thread handle corresponding + to this trx, or NULL */ os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated with this transaction object */ ulint n_mysql_tables_in_use; /* number of Innobase tables @@ -302,6 +323,9 @@ struct trx_struct{ of a duplicate key error */ UT_LIST_NODE_T(trx_t) trx_list; /* list of transactions */ + UT_LIST_NODE_T(trx_t) + mysql_trx_list; /* list of transactions created for + MySQL */ /*------------------------------*/ mutex_t undo_mutex; /* mutex protecting the fields in this section (down to undo_no_arr), EXCEPT diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c index 5f8f538f392..819c559ceb4 100644 --- a/innobase/lock/lock0lock.c +++ b/innobase/lock/lock0lock.c @@ -13,6 +13,7 @@ Created 5/7/1996 Heikki Tuuri #endif #include "usr0sess.h" +#include "trx0purge.h" /* When releasing transaction locks, this specifies how often we release the kernel mutex for a moment to give also others access to it */ @@ -3184,7 +3185,7 @@ lock_table_print( ut_ad(mutex_own(&kernel_mutex)); ut_a(lock_get_type(lock) == LOCK_TABLE); - printf("\nTABLE LOCK table %s trx id %lu %lu", + printf("TABLE LOCK table %s trx id %lu %lu", lock->un_member.tab_lock.table->name, (lock->trx)->id.high, (lock->trx)->id.low); @@ -3220,6 +3221,8 @@ lock_rec_print( ulint page_no; ulint i; ulint count = 0; + ulint len; + char buf[200]; mtr_t mtr; ut_ad(mutex_own(&kernel_mutex)); @@ -3228,7 +3231,7 @@ lock_rec_print( space = lock->un_member.rec_lock.space; page_no = lock->un_member.rec_lock.page_no; - printf("\nRECORD LOCKS space id %lu page no %lu n bits %lu", + printf("RECORD LOCKS space id %lu page no %lu n bits %lu", space, page_no, lock_rec_get_n_bits(lock)); printf(" table %s index %s trx id %lu %lu", @@ -3251,10 +3254,10 @@ lock_rec_print( printf(" waiting"); } - printf("\n"); - mtr_start(&mtr); + printf("\n"); + /* If the page is not in the buffer pool, we cannot load it because we have the kernel mutex and ibuf operations would break the latching order */ @@ -3280,12 +3283,14 @@ lock_rec_print( printf("Record lock, heap no %lu ", i); if (page) { - rec_print(page_find_rec_with_heap_no(page, i)); + len = rec_sprintf(buf, 190, + page_find_rec_with_heap_no(page, i)); + buf[len] = '\0'; + printf("%s", buf); } - count++; - printf("\n"); + count++; } if (count >= 3) { @@ -3342,12 +3347,32 @@ lock_print_info(void) ulint nth_lock = 0; ulint i; mtr_t mtr; - - lock_mutex_enter_kernel(); - printf("LOCK INFO:\n"); - printf("Number of locks in the record hash table %lu\n", + printf( + "Purge done for all trx's with n:o < %lu %lu, undo n:o < %lu %lu\n", + ut_dulint_get_high(purge_sys->purge_trx_no), + ut_dulint_get_low(purge_sys->purge_trx_no), + ut_dulint_get_high(purge_sys->purge_undo_no), + ut_dulint_get_low(purge_sys->purge_undo_no)); + + lock_mutex_enter_kernel(); + + printf("Total number of lock structs in row lock hash table %lu\n", lock_get_n_rec_locks()); + + /* First print info on non-active transactions */ + + trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); + + while (trx) { + if (trx->conc_state == TRX_NOT_STARTED) { + printf("---"); + trx_print(trx); + } + + trx = UT_LIST_GET_NEXT(mysql_trx_list, trx); + } + loop: trx = UT_LIST_GET_FIRST(trx_sys->trx_list); @@ -3367,11 +3392,21 @@ loop: } if (nth_lock == 0) { - printf("\nLOCKS FOR TRANSACTION ID %lu %lu\n", trx->id.high, - trx->id.low); + printf("---"); + trx_print(trx); + + if (trx->read_view) { + printf( + "Trx read view will not see trx with id >= %lu %lu, sees < %lu %lu\n", + ut_dulint_get_high(trx->read_view->low_limit_id), + ut_dulint_get_low(trx->read_view->low_limit_id), + ut_dulint_get_high(trx->read_view->up_limit_id), + ut_dulint_get_low(trx->read_view->up_limit_id)); + } + if (trx->que_state == TRX_QUE_LOCK_WAIT) { printf( - "################# TRX IS WAITING FOR THE LOCK: ###\n"); + "------------------TRX IS WAITING FOR THE LOCK:\n"); if (lock_get_type(trx->wait_lock) == LOCK_REC) { lock_rec_print(trx->wait_lock); @@ -3380,10 +3415,15 @@ loop: } printf( - "##################################################\n"); + "------------------\n"); } } + if (!srv_print_innodb_lock_monitor) { + nth_trx++; + goto loop; + } + i = 0; lock = UT_LIST_GET_FIRST(trx->trx_locks); @@ -3431,9 +3471,9 @@ loop: nth_lock++; - if (nth_lock >= 25) { + if (nth_lock >= 10) { printf( - "25 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n"); + "10 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n"); nth_trx++; nth_lock = 0; diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index 0525fd7b59a..d4d30f6aabc 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -1577,6 +1577,7 @@ os_aio_windows_handle( void** message2, ulint* type) /* out: OS_FILE_WRITE or ..._READ */ { + ulint orig_seg = segment; os_aio_array_t* array; os_aio_slot_t* slot; ulint n; @@ -1602,10 +1603,14 @@ os_aio_windows_handle( n = array->n_slots / array->n_segments; if (array == os_aio_sync_array) { + srv_io_thread_op_info[orig_seg] = "wait windows aio for 1 page"; + ut_ad(pos < array->n_slots); os_event_wait(array->events[pos]); i = pos; } else { + srv_io_thread_op_info[orig_seg] = + "wait windows aio for n pages"; i = os_event_wait_multiple(n, (array->events) + segment * n); } @@ -1615,6 +1620,7 @@ os_aio_windows_handle( ut_a(slot->reserved); + srv_io_thread_op_info[orig_seg] = "get windows aio return value"; ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE); *message1 = slot->message1; @@ -1887,6 +1893,8 @@ consecutive_loop: } } + srv_io_thread_op_info[global_segment] = "doing file i/o"; + /* Do the i/o with ordinary, synchronous i/o functions: */ if (slot->type == OS_FILE_WRITE) { ret = os_file_write(slot->name, slot->file, combined_buf, @@ -1897,7 +1905,8 @@ consecutive_loop: } ut_a(ret); - + srv_io_thread_op_info[global_segment] = "file i/o done"; + /* printf("aio: %lu consecutive %lu:th segment, first offs %lu blocks\n", n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE); */ @@ -1953,6 +1962,8 @@ wait_for_io: os_mutex_exit(array->mutex); + srv_io_thread_op_info[global_segment] = "waiting for i/o request"; + os_event_wait(os_aio_segment_wait_events[global_segment]); goto restart; @@ -2023,7 +2034,12 @@ os_aio_print(void) ulint n_reserved; ulint i; - printf("Pending normal aio reads:\n"); + for (i = 0; i < srv_n_file_io_threads; i++) { + printf("I/O thread %lu state: %s\n", i, + srv_io_thread_op_info[i]); + } + + printf("Pending normal aio reads: "); array = os_aio_read_array; loop: @@ -2041,21 +2057,21 @@ loop: if (slot->reserved) { n_reserved++; - printf("Reserved slot, messages %lx %lx\n", + /* printf("Reserved slot, messages %lx %lx\n", (ulint)slot->message1, (ulint)slot->message2); - ut_a(slot->len > 0); + */ ut_a(slot->len > 0); } } ut_a(array->n_reserved == n_reserved); - printf("Total of %lu reserved aio slots\n", n_reserved); + printf("%lu\n", n_reserved); os_mutex_exit(array->mutex); if (array == os_aio_read_array) { - printf("Pending aio writes:\n"); + printf("Pending aio writes: "); array = os_aio_write_array; @@ -2063,21 +2079,21 @@ loop: } if (array == os_aio_write_array) { - printf("Pending insert buffer aio reads:\n"); + printf("Pending insert buffer aio reads: "); array = os_aio_ibuf_array; goto loop; } if (array == os_aio_ibuf_array) { - printf("Pending log writes or reads:\n"); + printf("Pending log writes or reads: "); array = os_aio_log_array; goto loop; } if (array == os_aio_log_array) { - printf("Pending synchronous reads or writes:\n"); + printf("Pending synchronous reads or writes: "); array = os_aio_sync_array; goto loop; diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index 9bbc45a5c9a..373ee4ac4bd 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -462,6 +462,8 @@ row_insert_for_mysql( ut_ad(trx); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + trx->op_info = "inserting"; + if (node == NULL) { row_get_prebuilt_insert_row(prebuilt); node = prebuilt->ins_node; @@ -499,6 +501,8 @@ run_again: goto run_again; } + trx->op_info = ""; + return(err); } @@ -506,12 +510,15 @@ run_again: prebuilt->table->stat_n_rows++; + srv_n_rows_inserted++; + if (prebuilt->table->stat_n_rows == 0) { /* Avoid wrap-over */ prebuilt->table->stat_n_rows--; } row_update_statistics_if_needed(prebuilt); + trx->op_info = ""; return((int) err); } @@ -627,6 +634,8 @@ row_update_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); UT_NOT_USED(mysql_rec); + trx->op_info = "updating or deleting"; + node = prebuilt->upd_node; clust_index = dict_table_get_first_index(table); @@ -700,6 +709,7 @@ run_again: if (err == DB_RECORD_NOT_FOUND) { trx->error_state = DB_SUCCESS; + trx->op_info = ""; return((int) err); } @@ -710,6 +720,8 @@ run_again: goto run_again; } + trx->op_info = ""; + return(err); } @@ -719,10 +731,16 @@ run_again: if (prebuilt->table->stat_n_rows > 0) { prebuilt->table->stat_n_rows--; } - } + + srv_n_rows_deleted++; + } else { + srv_n_rows_updated++; + } row_update_statistics_if_needed(prebuilt); + trx->op_info = ""; + return((int) err); } @@ -798,6 +816,8 @@ row_create_table_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + trx->op_info = "creating table"; + /* Serialize data dictionary operations with dictionary mutex: no deadlocks can occur then in these operations */ @@ -825,16 +845,22 @@ row_create_table_for_mysql( trx_general_rollback_for_mysql(trx, FALSE, NULL); if (err == DB_OUT_OF_FILE_SPACE) { - row_drop_table_for_mysql(table->name, trx, TRUE); + row_drop_table_for_mysql(table->name, trx, TRUE); } else { - assert(err == DB_DUPLICATE_KEY); - fprintf(stderr, + assert(err == DB_DUPLICATE_KEY); + fprintf(stderr, "InnoDB: Error: table %s already exists in InnoDB internal\n" "InnoDB: data dictionary. Have you deleted the .frm file\n" "InnoDB: and not used DROP TABLE? Have you used DROP DATABASE\n" - "InnoDB: for InnoDB tables in MySQL version <= 3.23.39?\n" + "InnoDB: for InnoDB tables in MySQL version <= 3.23.42?\n" "InnoDB: See the Restrictions section of the InnoDB manual.\n", table->name); + fprintf(stderr, + "InnoDB: You can drop the orphaned table inside InnoDB by\n" + "InnoDB: creating an InnoDB table with the same name in another\n" + "InnoDB: database and moving the .frm file to the current database.\n" + "InnoDB: Then MySQL thinks the table exists, and DROP TABLE will\n" + "InnoDB: succeed.\n"); } trx->error_state = DB_SUCCESS; @@ -852,11 +878,32 @@ row_create_table_for_mysql( srv_print_innodb_monitor = TRUE; } + + keywordlen = ut_strlen("innodb_lock_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_lock_monitor", keywordlen)) { + + srv_print_innodb_monitor = TRUE; + srv_print_innodb_lock_monitor = TRUE; + } + + keywordlen = ut_strlen("innodb_tablespace_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_tablespace_monitor", keywordlen)) { + + srv_print_innodb_tablespace_monitor = TRUE; + } } mutex_exit(&(dict_sys->mutex)); que_graph_free((que_t*) que_node_get_parent(thr)); - + + trx->op_info = ""; + return((int) err); } @@ -879,6 +926,8 @@ row_create_index_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + trx->op_info = "creating index"; + /* Serialize data dictionary operations with dictionary mutex: no deadlocks can occur then in these operations */ @@ -915,6 +964,8 @@ row_create_index_for_mysql( que_graph_free((que_t*) que_node_get_parent(thr)); + trx->op_info = ""; + return((int) err); } @@ -945,7 +996,9 @@ row_drop_table_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_a(name != NULL); - + + trx->op_info = "dropping table"; + namelen = ut_strlen(name); keywordlen = ut_strlen("innodb_monitor"); @@ -957,6 +1010,26 @@ row_drop_table_for_mysql( stop monitor prints */ srv_print_innodb_monitor = FALSE; + srv_print_innodb_lock_monitor = FALSE; + } + + keywordlen = ut_strlen("innodb_lock_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(name + namelen - keywordlen, + "innodb_lock_monitor", keywordlen)) { + + srv_print_innodb_monitor = FALSE; + srv_print_innodb_lock_monitor = FALSE; + } + + keywordlen = ut_strlen("innodb_tablespace_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(name + namelen - keywordlen, + "innodb_tablespace_monitor", keywordlen)) { + + srv_print_innodb_tablespace_monitor = FALSE; } /* We use the private SQL parser of Innobase to generate the @@ -1071,6 +1144,8 @@ funct_exit: que_graph_free(graph); + trx->op_info = ""; + return((int) err); } @@ -1099,6 +1174,8 @@ row_rename_table_for_mysql( ut_a(old_name != NULL); ut_a(new_name != NULL); + trx->op_info = "renaming table"; + str1 = "PROCEDURE RENAME_TABLE_PROC () IS\n" "BEGIN\n" @@ -1168,6 +1245,8 @@ funct_exit: que_graph_free(graph); + trx->op_info = ""; + return((int) err); } @@ -1279,6 +1358,8 @@ row_check_table_for_mysql( ulint n_rows; ulint n_rows_in_table; ulint ret = DB_SUCCESS; + + prebuilt->trx->op_info = "checking table"; index = dict_table_get_first_index(table); @@ -1311,5 +1392,7 @@ row_check_table_for_mysql( index = dict_table_get_next_index(index); } + prebuilt->trx->op_info = ""; + return(ret); } diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c index ec880d3fe04..43bc166347a 100644 --- a/innobase/row/row0purge.c +++ b/innobase/row/row0purge.c @@ -132,7 +132,7 @@ row_purge_remove_clust_if_poss_low( success = btr_cur_optimistic_delete(btr_cur, &mtr); } else { ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr); if (err == DB_SUCCESS) { success = TRUE; @@ -254,8 +254,8 @@ row_purge_remove_sec_if_poss_low( success = btr_cur_optimistic_delete(btr_cur, &mtr); } else { ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr); - + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, + FALSE, &mtr); if (err == DB_SUCCESS) { success = TRUE; } else if (err == DB_OUT_OF_FILE_SPACE) { @@ -437,7 +437,7 @@ skip_secondaries: data_field_len = ufield->new_val.len; btr_free_externally_stored_field(index, data_field, - data_field_len, &mtr); + data_field_len, FALSE, &mtr); mtr_commit(&mtr); } } diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index 0ad6b7084e2..d041e34a558 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -2488,6 +2488,8 @@ row_search_for_mysql( printf("N tables locked %lu\n", trx->mysql_n_tables_locked); */ if (direction == 0) { + trx->op_info = "starting index read"; + prebuilt->n_rows_fetched = 0; prebuilt->n_fetch_cached = 0; prebuilt->fetch_cache_first = 0; @@ -2497,6 +2499,8 @@ row_search_for_mysql( row_prebuild_sel_graph(prebuilt); } } else { + trx->op_info = "fetching rows"; + if (prebuilt->n_rows_fetched == 0) { prebuilt->fetch_direction = direction; } @@ -2519,6 +2523,9 @@ row_search_for_mysql( prebuilt->n_rows_fetched++; + srv_n_rows_read++; + trx->op_info = ""; + return(DB_SUCCESS); } @@ -2529,6 +2536,7 @@ row_search_for_mysql( cache, but the cache was not full at the time of the popping: no more rows can exist in the result set */ + trx->op_info = ""; return(DB_RECORD_NOT_FOUND); } @@ -2560,6 +2568,7 @@ row_search_for_mysql( /* printf("%s record not found 1\n", index->name); */ + trx->op_info = ""; return(DB_RECORD_NOT_FOUND); } @@ -2599,6 +2608,9 @@ row_search_for_mysql( /* printf("%s shortcut\n", index->name); */ + srv_n_rows_read++; + + trx->op_info = ""; return(DB_SUCCESS); } else if (shortcut == SEL_EXHAUSTED) { @@ -2607,6 +2619,7 @@ row_search_for_mysql( /* printf("%s record not found 2\n", index->name); */ + trx->op_info = ""; return(DB_RECORD_NOT_FOUND); } @@ -2980,6 +2993,8 @@ lock_wait_or_error: /* printf("Using index %s cnt %lu ret value %lu err\n", index->name, cnt, err); */ + trx->op_info = ""; + return(err); normal_return: @@ -2995,5 +3010,11 @@ normal_return: /* printf("Using index %s cnt %lu ret value %lu\n", index->name, cnt, err); */ + if (ret == DB_SUCCESS) { + srv_n_rows_read++; + } + + trx->op_info = ""; + return(ret); } diff --git a/innobase/row/row0uins.c b/innobase/row/row0uins.c index 47807877779..27d1fbcb9ba 100644 --- a/innobase/row/row0uins.c +++ b/innobase/row/row0uins.c @@ -89,7 +89,7 @@ retry: &(node->pcur), &mtr); ut_a(success); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr); /* The delete operation may fail if we have little file space left: TODO: easiest to crash the database @@ -174,7 +174,7 @@ row_undo_ins_remove_sec_low( } else { ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr); } btr_pcur_close(&pcur); diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c index 0221c51b985..a7c8957d61a 100644 --- a/innobase/row/row0umod.c +++ b/innobase/row/row0umod.c @@ -179,7 +179,11 @@ row_undo_mod_remove_clust_low( } else { ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, mtr); + /* Note that since this operation is analogous to purge, + we can free also inherited externally stored fields: + hence the last FALSE in the call below */ + + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, mtr); /* The delete operation may fail if we have little file space left: TODO: easiest to crash the database @@ -356,7 +360,8 @@ row_undo_mod_del_mark_or_remove_sec_low( } else { ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, + TRUE, &mtr); /* The delete operation may fail if we have little file space left: TODO: easiest to crash the database @@ -423,22 +428,22 @@ row_undo_mod_del_unmark_sec( found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, &mtr); if (!found) { - err_buf = mem_alloc(1000); - dtuple_sprintf(err_buf, 900, entry); + err_buf = mem_alloc(1000); + dtuple_sprintf(err_buf, 900, entry); - fprintf(stderr, "InnoDB: error in sec index entry del undo in\n" - "InnoDB: index %s table %s\n", index->name, - index->table->name); - fprintf(stderr, "InnoDB: tuple %s\n", err_buf); + fprintf(stderr, "InnoDB: error in sec index entry del undo in\n" + "InnoDB: index %s table %s\n", index->name, + index->table->name); + fprintf(stderr, "InnoDB: tuple %s\n", err_buf); - rec_sprintf(err_buf, 900, btr_pcur_get_rec(&pcur)); - fprintf(stderr, "InnoDB: record %s\n", err_buf); + rec_sprintf(err_buf, 900, btr_pcur_get_rec(&pcur)); + fprintf(stderr, "InnoDB: record %s\n", err_buf); - fprintf(stderr, "InnoDB: Make a detailed bug report and send it\n"); - fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); - - mem_free(err_buf); + fprintf(stderr, + "InnoDB: Make a detailed bug report and send it\n"); + fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + mem_free(err_buf); } else { btr_cur = btr_pcur_get_btr_cur(&pcur); diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index 67a5925a3f5..3fa98db3a02 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -840,32 +840,31 @@ row_upd_sec_index_entry( rec = btr_cur_get_rec(btr_cur); if (!found) { + err_buf = mem_alloc(1000); + dtuple_sprintf(err_buf, 900, entry); - err_buf = mem_alloc(1000); - dtuple_sprintf(err_buf, 900, entry); + fprintf(stderr, "InnoDB: error in sec index entry update in\n" + "InnoDB: index %s table %s\n", index->name, + index->table->name); + fprintf(stderr, "InnoDB: tuple %s\n", err_buf); - fprintf(stderr, "InnoDB: error in sec index entry update in\n" - "InnoDB: index %s table %s\n", index->name, - index->table->name); - fprintf(stderr, "InnoDB: tuple %s\n", err_buf); + rec_sprintf(err_buf, 900, rec); + fprintf(stderr, "InnoDB: record %s\n", err_buf); - rec_sprintf(err_buf, 900, rec); - fprintf(stderr, "InnoDB: record %s\n", err_buf); + fprintf(stderr, + "InnoDB: Make a detailed bug report and send it\n"); + fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); - fprintf(stderr, "InnoDB: Make a detailed bug report and send it\n"); - fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); - - mem_free(err_buf); + mem_free(err_buf); } else { + /* Delete mark the old index record; it can already be + delete marked if we return after a lock wait in + row_ins_index_entry below */ - /* Delete mark the old index record; it can already be - delete marked if we return after a lock wait in - row_ins_index_entry below */ - - if (!rec_get_deleted_flag(rec)) { - err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, - &mtr); - } + if (!rec_get_deleted_flag(rec)) { + err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, + thr, &mtr); + } } btr_pcur_close(&pcur); @@ -907,7 +906,7 @@ row_upd_sec_step( || (node->state == UPD_NODE_UPDATE_SOME_SEC)); ut_ad(!(node->index->type & DICT_CLUSTERED)); - if ((node->state == UPD_NODE_UPDATE_ALL_SEC) + if (node->state == UPD_NODE_UPDATE_ALL_SEC || row_upd_changes_ord_field(node->row, node->index, node->update)) { err = row_upd_sec_index_entry(node, thr); @@ -933,15 +932,13 @@ row_upd_clust_rec_by_insert( dict_index_t* index, /* in: clustered index of the record */ que_thr_t* thr, /* in: query thread */ mtr_t* mtr) /* in: mtr; gets committed here */ -{ +{ + mem_heap_t* heap; btr_pcur_t* pcur; btr_cur_t* btr_cur; trx_t* trx; dict_table_t* table; - mem_heap_t* heap; dtuple_t* entry; - ulint* ext_vec; - ulint n_ext_vec; ulint err; ut_ad(node); @@ -961,17 +958,20 @@ row_upd_clust_rec_by_insert( return(err); } + /* Mark as not-owned the externally stored fields which the new + row inherits from the delete marked record: purge should not + free those externally stored fields even if the delete marked + record is removed from the index tree, or updated. */ + + btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur), + node->update, mtr); } mtr_commit(mtr); node->state = UPD_NODE_INSERT_CLUSTERED; - heap = mem_heap_create(1024); - - ext_vec = mem_heap_alloc(heap, - sizeof(ulint) * dtuple_get_n_fields(node->row)); - n_ext_vec = 0; + heap = mem_heap_create(500); entry = row_build_index_entry(node->row, index, heap); @@ -979,10 +979,23 @@ row_upd_clust_rec_by_insert( row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); + /* If we return from a lock wait, for example, we may have + extern fields marked as not-owned in entry (marked if the + if-branch above). We must unmark them. */ + + btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec, + node->n_ext_vec); + /* We must mark non-updated extern fields in entry as inherited, + so that a possible rollback will not free them */ + + btr_cur_mark_dtuple_inherited_extern(entry, node->ext_vec, + node->n_ext_vec, + node->update); + err = row_ins_index_entry(index, entry, node->ext_vec, node->n_ext_vec, thr); - mem_heap_free(heap); - + mem_heap_free(heap); + return(err); } diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index 8dd9c9f3feb..ba556e1c050 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -111,7 +111,14 @@ ibool srv_print_buf_io = FALSE; ibool srv_print_log_io = FALSE; ibool srv_print_latch_waits = FALSE; +ulint srv_n_rows_inserted = 0; +ulint srv_n_rows_updated = 0; +ulint srv_n_rows_deleted = 0; +ulint srv_n_rows_read = 0; + ibool srv_print_innodb_monitor = FALSE; +ibool srv_print_innodb_lock_monitor = FALSE; +ibool srv_print_innodb_tablespace_monitor = FALSE; /* The parameters below are obsolete: */ @@ -137,6 +144,11 @@ ulint srv_test_n_reserved_rnds = ULINT_MAX; ulint srv_test_array_size = ULINT_MAX; ulint srv_test_n_mutexes = ULINT_MAX; +/* Array of English strings describing the current state of an +i/o handler thread */ + +char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS]; + /* IMPLEMENTATION OF THE SERVER MAIN PROGRAM ========================================= @@ -1926,23 +1938,25 @@ loop: } background_loop: - /* In this loop we run background operations while the server + /* In this loop we run background operations when the server is quiet */ current_time = time(NULL); - if (srv_print_innodb_monitor - && difftime(current_time, last_monitor_time) > 8) { + if (difftime(current_time, last_monitor_time) > 15) { + + last_monitor_time = time(NULL); + + if (srv_print_innodb_monitor) { - printf("================================\n"); - last_monitor_time = time(NULL); + printf("=====================================\n"); ut_print_timestamp(stdout); printf(" INNODB MONITOR OUTPUT\n" - "================================\n"); - printf("--------------------------\n" - "LOCKS HELD BY TRANSACTIONS\n" - "--------------------------\n"); + "=====================================\n"); + printf("------------\n" + "TRANSACTIONS\n" + "------------\n"); lock_print_info(); printf("-----------------------------------------------\n" "CURRENT SEMAPHORES RESERVED AND SEMAPHORE WAITS\n" @@ -1955,11 +1969,40 @@ background_loop: "BUFFER POOL\n" "-----------\n"); buf_print_io(); + printf("--------------\n" + "ROW OPERATIONS\n" + "--------------\n"); + printf( + "Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n", + srv_n_rows_inserted, + srv_n_rows_updated, + srv_n_rows_deleted, + srv_n_rows_read); + printf("Server activity counter %lu\n", srv_activity_count); printf("----------------------------\n" "END OF INNODB MONITOR OUTPUT\n" "============================\n"); - } + } + + if (srv_print_innodb_tablespace_monitor) { + printf("================================================\n"); + + ut_print_timestamp(stdout); + + printf(" INNODB TABLESPACE MONITOR OUTPUT\n" + "================================================\n"); + + fsp_print(0); + fprintf(stderr, "Validating tablespace\n"); + fsp_validate(0); + fprintf(stderr, "Validation ok\n"); + printf("---------------------------------------\n" + "END OF INNODB TABLESPACE MONITOR OUTPUT\n" + "=======================================\n"); + } + } + mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { mutex_exit(&kernel_mutex); @@ -2009,8 +2052,18 @@ background_loop: } mutex_exit(&kernel_mutex); + if (srv_print_innodb_monitor) { + ut_print_timestamp(stdout); + printf(" InnoDB (main thread) starts buffer pool flush\n"); + } + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + if (srv_print_innodb_monitor) { + ut_print_timestamp(stdout); + printf(" InnoDB flushed %lu pages\n", n_pages_flushed); + } + mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { mutex_exit(&kernel_mutex); @@ -2038,12 +2091,7 @@ background_loop: /* mem_print_new_info(); */ -/* - fsp_print(0); - fprintf(stderr, "Validating tablespace\n"); - fsp_validate(0); - fprintf(stderr, "Validation ok\n"); -*/ + #ifdef UNIV_SEARCH_PERF_STAT /* btr_search_print_info(); */ #endif diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c index b3f5dbb28b5..c4002767226 100644 --- a/innobase/srv/srv0start.c +++ b/innobase/srv/srv0start.c @@ -67,8 +67,6 @@ os_file_t files[1000]; mutex_t ios_mutex; ulint ios; -#define SRV_MAX_N_IO_THREADS 1000 - ulint n[SRV_MAX_N_IO_THREADS + 5]; os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5]; @@ -591,6 +589,11 @@ innobase_start_or_create_for_mysql(void) return((int) err); } + /* Restrict the maximum number of file i/o threads */ + if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) { + srv_n_file_io_threads = SRV_MAX_N_IO_THREADS; + } + #if !(defined(WIN_ASYNC_IO) || defined(POSIX_ASYNC_IO)) /* In simulated aio we currently have use only for 4 threads */ diff --git a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c index 48d043e1e04..4183f3f1c4c 100644 --- a/innobase/sync/sync0arr.c +++ b/innobase/sync/sync0arr.c @@ -438,22 +438,48 @@ sync_array_cell_print( /*==================*/ sync_cell_t* cell) /* in: sync cell */ { - char* str = NULL; - ulint type; + mutex_t* mutex; + rw_lock_t* rwlock; + char* str = NULL; + ulint type; type = cell->request_type; if (type == SYNC_MUTEX) { str = "MUTEX ENTER"; - } else if (type == RW_LOCK_EX) { - str = "X-LOCK"; - } else if (type == RW_LOCK_SHARED) { - str = "S-LOCK"; + mutex = (mutex_t*)cell->wait_object; + + printf("Mutex created in file %s line %lu", + mutex->cfile_name, mutex->cline); + } else if (type == RW_LOCK_EX || type == RW_LOCK_SHARED) { + + if (type == RW_LOCK_EX) { + str = "X-LOCK"; + } else { + str = "S_LOCK"; + } + + rwlock = (rw_lock_t*)cell->wait_object; + + printf("Rw-latch created in file %s line %lu", + rwlock->cfile_name, rwlock->cline); + if (rwlock->writer != RW_LOCK_NOT_LOCKED) { + printf(" writer reserved with %lu", rwlock->writer); + } + + if (rwlock->writer == RW_LOCK_EX) { + printf(" reserv. thread id %lu", + (ulint)rwlock->writer_thread); + } + + if (rwlock->reader_count > 0) { + printf(" readers %lu", rwlock->reader_count); + } } else { ut_error; } - printf("%lx waited for by thread %lu op. %s file %s line %lu ", + printf(" at addr %lx waited for by thread %lu op. %s file %s line %lu ", (ulint)cell->wait_object, (ulint)cell->thread, str, cell->file, cell->line); diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c index 6b74c0d0d51..2adeb1cf57c 100644 --- a/innobase/trx/trx0roll.c +++ b/innobase/trx/trx0roll.c @@ -98,6 +98,8 @@ trx_rollback_for_mysql( return(DB_SUCCESS); } + + trx->op_info = "rollback"; /* Tell Innobase server that there might be work for utility threads: */ @@ -111,6 +113,8 @@ trx_rollback_for_mysql( srv_active_wake_master_thread(); + trx->op_info = ""; + return(err); } @@ -129,6 +133,8 @@ trx_rollback_last_sql_stat_for_mysql( return(DB_SUCCESS); } + + trx->op_info = "rollback of SQL statement"; /* Tell Innobase server that there might be work for utility threads: */ @@ -144,6 +150,8 @@ trx_rollback_last_sql_stat_for_mysql( srv_active_wake_master_thread(); + trx->op_info = ""; + return(err); } diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c index b056975d28a..0b8664013d7 100644 --- a/innobase/trx/trx0sys.c +++ b/innobase/trx/trx0sys.c @@ -510,7 +510,8 @@ trx_sys_init_at_db_start(void) MLOG_8BYTES, &mtr), TRX_SYS_TRX_ID_WRITE_MARGIN), 2 * TRX_SYS_TRX_ID_WRITE_MARGIN); - + + UT_LIST_INIT(trx_sys->mysql_trx_list); trx_lists_init_at_db_start(); if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) { diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c index 14108c677eb..5d8c57edf34 100644 --- a/innobase/trx/trx0trx.c +++ b/innobase/trx/trx0trx.c @@ -24,6 +24,12 @@ Created 3/26/1996 Heikki Tuuri #include "thr0loc.h" #include "btr0sea.h" + +/* Copy of the prototype for innobase_mysql_print_thd: this + copy must be equal to the one in mysql/sql/ha_innobase.cc ! */ +void innobase_mysql_print_thd(void* thd); + + /* Dummy session used currently in MySQL interface */ sess_t* trx_dummy_sess = NULL; @@ -58,11 +64,15 @@ trx_create( trx = mem_alloc(sizeof(trx_t)); + trx->op_info = ""; + trx->type = TRX_USER; trx->conc_state = TRX_NOT_STARTED; trx->dict_operation = FALSE; + trx->mysql_thd = NULL; + trx->n_mysql_tables_in_use = 0; trx->mysql_n_tables_locked = 0; @@ -129,6 +139,8 @@ trx_allocate_for_mysql(void) trx_n_mysql_transactions++; + UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx); + mutex_exit(&kernel_mutex); trx->mysql_thread_id = os_thread_get_curr_id(); @@ -144,11 +156,11 @@ trx_search_latch_release_if_reserved( /*=================================*/ trx_t* trx) /* in: transaction */ { - if (trx->has_search_latch) { - rw_lock_s_unlock(&btr_search_latch); + if (trx->has_search_latch) { + rw_lock_s_unlock(&btr_search_latch); - trx->has_search_latch = FALSE; - } + trx->has_search_latch = FALSE; + } } /************************************************************************ @@ -209,6 +221,8 @@ trx_free_for_mysql( mutex_enter(&kernel_mutex); + UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx); + trx_free(trx); ut_a(trx_n_mysql_transactions > 0); @@ -641,7 +655,7 @@ shortcut: ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); - UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); + UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); } /************************************************************************ @@ -1268,6 +1282,8 @@ trx_commit_for_mysql( sig to the transaction, we must here make sure that trx has been started. */ + trx->op_info = "committing"; + trx_start_if_not_started(trx); mutex_enter(&kernel_mutex); @@ -1276,6 +1292,8 @@ trx_commit_for_mysql( mutex_exit(&kernel_mutex); + trx->op_info = ""; + return(0); } @@ -1295,3 +1313,78 @@ trx_mark_sql_stat_end( mutex_exit(&kernel_mutex); } + +/************************************************************************** +Marks the latest SQL statement ended but does not start a new transaction +if the trx is not started. */ + +void +trx_mark_sql_stat_end_do_not_start_new( +/*===================================*/ + trx_t* trx) /* in: trx handle */ +{ + mutex_enter(&kernel_mutex); + + trx->last_sql_stat_start.least_undo_no = trx->undo_no; + + mutex_exit(&kernel_mutex); +} + +/************************************************************************** +Prints info about a transaction to the standard output. The caller must +own the kernel mutex. */ + +void +trx_print( +/*======*/ + trx_t* trx) /* in: transaction */ +{ + printf("TRANSACTION %lu %lu, OS thread id %lu", + ut_dulint_get_high(trx->id), + ut_dulint_get_low(trx->id), + (ulint)trx->mysql_thread_id); + + if (ut_strlen(trx->op_info) > 0) { + printf(" %s", trx->op_info); + } + + if (trx->type != TRX_USER) { + printf(" purge trx"); + } + + switch (trx->conc_state) { + case TRX_NOT_STARTED: printf(", not started"); break; + case TRX_ACTIVE: printf(", active"); break; + case TRX_COMMITTED_IN_MEMORY: printf(", committed in memory"); + break; + default: printf(" state %lu", trx->conc_state); + } + + switch (trx->que_state) { + case TRX_QUE_RUNNING: printf(", runs or sleeps"); break; + case TRX_QUE_LOCK_WAIT: printf(", lock wait"); break; + case TRX_QUE_ROLLING_BACK: printf(", rolling back"); break; + case TRX_QUE_COMMITTING: printf(", committing"); break; + default: printf(" que state %lu", trx->que_state); + } + + if (0 < UT_LIST_GET_LEN(trx->trx_locks)) { + printf(", has %lu lock struct(s)", + UT_LIST_GET_LEN(trx->trx_locks)); + } + + if (trx->has_search_latch) { + printf(", holds adaptive hash latch"); + } + + if (ut_dulint_cmp(trx->undo_no, ut_dulint_zero) != 0) { + printf(", undo log entries %lu", + ut_dulint_get_low(trx->undo_no)); + } + + printf("\n"); + + if (trx->mysql_thd != NULL) { + innobase_mysql_print_thd(trx->mysql_thd); + } +} diff --git a/sql/ha_innobase.cc b/sql/ha_innobase.cc index 061371eb5d4..5f76ec39ce3 100644 --- a/sql/ha_innobase.cc +++ b/sql/ha_innobase.cc @@ -180,6 +180,47 @@ convert_error_code_to_mysql( } } +extern "C" { +/***************************************************************** +Prints info of a THD object (== user session thread) to the +standatd output. NOTE that mysql/innobase/trx/trx0trx.c must contain +the prototype for this function! */ + +void +innobase_mysql_print_thd( +/*=====================*/ + void* input_thd)/* in: pointer to a MySQL THD object */ +{ + THD* thd; + + thd = (THD*) input_thd; + + printf("MySQL thread id %lu, query id %lu", + thd->thread_id, thd->query_id); + if (thd->host) { + printf(" %s", thd->host); + } + + if (thd->ip) { + printf(" %s", thd->ip); + } + + if (thd->user) { + printf(" %s", thd->user); + } + + if (thd->proc_info) { + printf(" %s", thd->proc_info); + } + + if (thd->query) { + printf(" %0.100s", thd->query); + } + + printf("\n"); +} +} + /************************************************************************* Gets the InnoDB transaction handle for a MySQL handler object, creates an InnoDB transaction struct if the corresponding MySQL thread struct still @@ -199,6 +240,8 @@ check_trx_exists( dbug_assert(thd != NULL); trx = trx_allocate_for_mysql(); + trx->mysql_thd = thd; + thd->transaction.all.innobase_tid = trx; /* The execution of a single SQL statement is denoted by @@ -633,7 +676,7 @@ innobase_commit( if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { trx_commit_for_mysql(trx); - trx_mark_sql_stat_end(trx); + trx_mark_sql_stat_end_do_not_start_new(trx); } else { trx_mark_sql_stat_end(trx); } @@ -672,6 +715,7 @@ innobase_rollback( if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { error = trx_rollback_for_mysql(trx); + trx_mark_sql_stat_end_do_not_start_new(trx); } else { error = trx_rollback_last_sql_stat_for_mysql(trx); trx_mark_sql_stat_end(trx); @@ -1334,8 +1378,15 @@ ha_innobase::write_row( autoincrement field */ auto_inc = table->next_number_field->val_int(); - if (auto_inc == 0) - auto_inc= user_thd->next_insert_id; + + /* In replication and also otherwise the auto-inc column + can be set with SET INSERT_ID. Then we must look at + user_thd->next_insert_id. If it is nonzero and the user + has not supplied a value, we must use it. */ + + if (auto_inc == 0 && user_thd->next_insert_id != 0) { + auto_inc = user_thd->next_insert_id; + } if (auto_inc != 0) { /* This call will calculate the max of the @@ -2221,29 +2272,29 @@ ha_innobase::external_lock( if (trx->n_mysql_tables_in_use == 0) { trx_mark_sql_stat_end(trx); } - thd->transaction.all.innodb_active_trans=1; + thd->transaction.all.innodb_active_trans = 1; trx->n_mysql_tables_in_use++; if (prebuilt->select_lock_type != LOCK_NONE) { - trx->mysql_n_tables_locked++; + trx->mysql_n_tables_locked++; } } else { trx->n_mysql_tables_in_use--; if (trx->n_mysql_tables_in_use == 0) { - trx->mysql_n_tables_locked = 0; + trx->mysql_n_tables_locked = 0; - if (trx->has_search_latch) { + if (trx->has_search_latch) { - trx_search_latch_release_if_reserved(trx); - } + trx_search_latch_release_if_reserved(trx); + } - if (!(thd->options - & (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN))) { - innobase_commit(thd, trx); - } + if (!(thd->options + & (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN))) { + innobase_commit(thd, trx); + } } } @@ -2639,6 +2690,10 @@ ha_innobase::records_in_range( DBUG_ENTER("records_in_range"); + if (prebuilt->trx) { + prebuilt->trx->op_info = "estimating range size"; + } + active_index = keynr; key = table->key_info + active_index; @@ -2671,6 +2726,10 @@ ha_innobase::records_in_range( my_free((char*) key_val_buff2, MYF(0)); + if (prebuilt->trx) { + prebuilt->trx->op_info = ""; + } + DBUG_RETURN((ha_rows) n_rows); } @@ -2690,10 +2749,15 @@ ha_innobase::estimate_number_of_rows(void) row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; dict_table_t* ib_table; + if (prebuilt->trx) { + prebuilt->trx->op_info = + "estimating upper bound of table size"; + } + DBUG_ENTER("info"); ib_table = prebuilt->table; - + dict_update_statistics(ib_table); data_file_length = ((ulonglong) @@ -2702,6 +2766,10 @@ ha_innobase::estimate_number_of_rows(void) /* The minimum clustered index record size is 20 bytes */ + if (prebuilt->trx) { + prebuilt->trx->op_info = ""; + } + return((ha_rows) (1000 + data_file_length / 20)); } @@ -2740,6 +2808,10 @@ ha_innobase::info( DBUG_ENTER("info"); + if (prebuilt->trx) { + prebuilt->trx->op_info = "calculating table stats"; + } + ib_table = prebuilt->table; if (flag & HA_STATUS_TIME) { @@ -2802,6 +2874,10 @@ ha_innobase::info( trx_get_error_info(prebuilt->trx)); } + if (prebuilt->trx) { + prebuilt->trx->op_info = ""; + } + DBUG_VOID_RETURN; }