From 31d63cd08f368e848ecdf01cb827bb3b956df1e2 Mon Sep 17 00:00:00 2001 From: marko Date: Thu, 30 Aug 2007 09:21:25 +0000 Subject: [PATCH] branches/zip: Merge 1664:1783 from trunk. --- handler/ha_innodb.cc | 326 ++++++++++++++++----------------------- ibuf/ibuf0ibuf.c | 5 - include/dict0mem.h | 20 ++- include/mach0data.h | 11 ++ include/mach0data.ic | 37 +++++ include/row0mysql.h | 13 +- include/sync0rw.h | 2 + include/sync0rw.ic | 2 +- include/trx0trx.h | 25 --- include/ut0mem.h | 2 +- lock/lock0lock.c | 7 + mysql-test/innodb.result | 1 + mysql-test/innodb.test | 3 + row/row0mysql.c | 16 +- row/row0sel.c | 25 +-- sync/sync0rw.c | 6 +- trx/trx0sys.c | 2 + trx/trx0trx.c | 18 --- 18 files changed, 244 insertions(+), 277 deletions(-) diff --git a/handler/ha_innodb.cc b/handler/ha_innodb.cc index dc4f7c82d40..a3f2e6fca0d 100644 --- a/handler/ha_innodb.cc +++ b/handler/ha_innodb.cc @@ -60,12 +60,6 @@ static bool innodb_inited = 0; */ static handlerton *innodb_hton_ptr; -/* Store MySQL definition of 'byte': in Linux it is char while InnoDB -uses unsigned char; the header univ.i which we include next defines -'byte' as a macro which expands to 'unsigned char' */ - -typedef uchar mysql_byte; - /* Include necessary InnoDB headers */ extern "C" { #include "../storage/innobase/include/univ.i" @@ -97,11 +91,15 @@ extern "C" { #include "../storage/innobase/include/ha_prototypes.h" } +static const long AUTOINC_OLD_STYLE_LOCKING = 0; +static const long AUTOINC_NEW_STYLE_LOCKING = 1; +static const long AUTOINC_NO_LOCKING = 2; + static long innobase_mirrored_log_groups, innobase_log_files_in_group, innobase_log_buffer_size, innobase_additional_mem_pool_size, innobase_file_io_threads, innobase_lock_wait_timeout, innobase_force_recovery, - innobase_open_files; + innobase_open_files, innobase_autoinc_lock_mode; static long long innobase_buffer_pool_size, innobase_log_file_size; @@ -147,7 +145,7 @@ static HASH innobase_open_tables; bool nw_panic = FALSE; #endif -static mysql_byte* innobase_get_key(INNOBASE_SHARE *share, size_t *length, +static uchar* innobase_get_key(INNOBASE_SHARE *share, size_t *length, my_bool not_used __attribute__((unused))); static INNOBASE_SHARE *get_share(const char *table_name); static void free_share(INNOBASE_SHARE *share); @@ -1928,110 +1926,6 @@ retry: DBUG_RETURN(0); } -#if 0 -/* TODO: put the -MySQL-4.1 functionality back to 5.0. This is needed to get InnoDB Hot Backup -to work. */ - -/********************************************************************* -This is called when MySQL writes the binlog entry for the current -transaction. Writes to the InnoDB tablespace info which tells where the -MySQL binlog entry for the current transaction ended. Also commits the -transaction inside InnoDB but does NOT flush InnoDB log files to disk. -To flush you have to call innobase_commit_complete(). We have separated -flushing to eliminate the bottleneck of LOCK_log in log.cc which disabled -InnoDB's group commit capability. */ -static -int -innobase_report_binlog_offset_and_commit( -/*=====================================*/ - /* out: 0 */ - handlerton *hton, /* in: Innodb handlerton */ - THD* thd, /* in: user thread */ - void* trx_handle, /* in: InnoDB trx handle */ - char* log_file_name, /* in: latest binlog file name */ - my_off_t end_offset) /* in: the offset in the binlog file - up to which we wrote */ -{ - trx_t* trx; - - trx = (trx_t*)trx_handle; - - ut_a(trx != NULL); - - trx->mysql_log_file_name = log_file_name; - trx->mysql_log_offset = (ib_longlong)end_offset; - - trx->flush_log_later = TRUE; - - innobase_commit(hton, thd, TRUE); - - trx->flush_log_later = FALSE; - - return(0); -} - -/*********************************************************************** -This function stores the binlog offset and flushes logs. */ -static -void -innobase_store_binlog_offset_and_flush_log( -/*=======================================*/ - char* binlog_name, /* in: binlog name */ - longlong offset) /* in: binlog offset */ -{ - mtr_t mtr; - - assert(binlog_name != NULL); - - /* Start a mini-transaction */ - mtr_start(&mtr); - - /* Update the latest MySQL binlog name and offset info - in trx sys header */ - - trx_sys_update_mysql_binlog_offset( - binlog_name, - offset, - TRX_SYS_MYSQL_LOG_INFO, &mtr); - - /* Commits the mini-transaction */ - mtr_commit(&mtr); - - /* Synchronous flush of the log buffer to disk */ - log_buffer_flush_to_disk(); -} - -/********************************************************************* -This is called after MySQL has written the binlog entry for the current -transaction. Flushes the InnoDB log files to disk if required. */ -static -int -innobase_commit_complete( -/*=====================*/ - /* out: 0 */ - THD* thd) /* in: user thread */ -{ - trx_t* trx; - - trx = thd_to_trx(thd); - - if (trx && trx->active_trans) { - - trx->active_trans = 0; - - if (UNIV_UNLIKELY(srv_flush_log_at_trx_commit == 0)) { - - return(0); - } - - trx_commit_complete_for_mysql(trx); - } - - return(0); -} -#endif - /********************************************************************* Rolls back a transaction or the latest SQL statement. */ static @@ -2411,7 +2305,7 @@ ha_innobase::open( upd_and_key_val_buff_len = table->s->reclength + table->s->max_key_length + MAX_REF_PARTS * 3; - if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME), + if (!(uchar*) my_multi_malloc(MYF(MY_WME), &upd_buff, upd_and_key_val_buff_len, &key_val_buff, upd_and_key_val_buff_len, NullS)) { @@ -2845,8 +2739,8 @@ inline uint innobase_read_from_2_little_endian( /*===============================*/ - /* out: value */ - const mysql_byte* buf) /* in: from where to read */ + /* out: value */ + const uchar* buf) /* in: from where to read */ { return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1]))); } @@ -2862,7 +2756,7 @@ ha_innobase::store_key_val_for_row( char* buff, /* in/out: buffer for the key value (in MySQL format) */ uint buff_len,/* in: buffer length */ - const mysql_byte* record)/* in: row in MySQL format */ + const uchar* record)/* in: row in MySQL format */ { KEY* key_info = table->key_info + keynr; KEY_PART_INFO* key_part = key_info->key_part; @@ -3059,7 +2953,7 @@ ha_innobase::store_key_val_for_row( CHARSET_INFO* cs; ulint true_len; ulint key_len; - const mysql_byte* src_start; + const uchar* src_start; int error=0; enum_field_types real_type; @@ -3352,24 +3246,46 @@ ha_innobase::innobase_autoinc_lock(void) { ulint error = DB_SUCCESS; - if (thd_sql_command(user_thd) == SQLCOM_INSERT) { + switch (innobase_autoinc_lock_mode) { + case AUTOINC_NO_LOCKING: + /* Acquire only the AUTOINC mutex. */ dict_table_autoinc_lock(prebuilt->table); + break; - /* We peek at the dict_table_t::auto_inc_lock to check if - another statement has locked it */ - if (prebuilt->trx->auto_inc_lock != NULL) { - /* Release the mutex to avoid deadlocks */ - dict_table_autoinc_unlock(prebuilt->table); + case AUTOINC_NEW_STYLE_LOCKING: + /* For simple (single/multi) row INSERTs, we fallback to the + old style only if another transaction has already acquired + the AUTOINC lock on behalf of a LOAD FILE or INSERT ... SELECT + etc. type of statement. */ + if (thd_sql_command(user_thd) == SQLCOM_INSERT) { + dict_table_t* table = prebuilt->table; - goto acquire_auto_inc_lock; + /* Acquire the AUTOINC mutex. */ + dict_table_autoinc_lock(table); + + /* We need to check that another transaction isn't + already holding the AUTOINC lock on the table. */ + if (table->n_waiting_or_granted_auto_inc_locks) { + /* Release the mutex to avoid deadlocks. */ + dict_table_autoinc_unlock(table); + } else { + break; + } } - } else { -acquire_auto_inc_lock: + /* Fall through to old style locking. */ + + case AUTOINC_OLD_STYLE_LOCKING: error = row_lock_table_autoinc_for_mysql(prebuilt); if (error == DB_SUCCESS) { + + /* Acquire the AUTOINC mutex. */ dict_table_autoinc_lock(prebuilt->table); } + break; + + default: + ut_error; } return(ulong(error)); @@ -3431,8 +3347,8 @@ handle. */ int ha_innobase::write_row( /*===================*/ - /* out: error code */ - mysql_byte* record) /* in: a row in MySQL format */ + /* out: error code */ + uchar* record) /* in: a row in MySQL format */ { int error = 0; ibool auto_inc_used= FALSE; @@ -3608,6 +3524,7 @@ no_commit: if (auto_inc > prebuilt->last_value) { set_max_autoinc: + ut_a(prebuilt->table->autoinc_increment > 0); auto_inc += prebuilt->table->autoinc_increment; innobase_set_max_autoinc(auto_inc); @@ -3635,16 +3552,16 @@ calc_row_difference( /*================*/ /* out: error number or 0 */ upd_t* uvect, /* in/out: update vector */ - mysql_byte* old_row, /* in: old row in MySQL format */ - mysql_byte* new_row, /* in: new row in MySQL format */ + uchar* old_row, /* in: old row in MySQL format */ + uchar* new_row, /* in: new row in MySQL format */ struct st_table* table, /* in: table in MySQL data dictionary */ - mysql_byte* upd_buff, /* in: buffer to use */ + uchar* upd_buff, /* in: buffer to use */ ulint buff_len, /* in: buffer length */ row_prebuilt_t* prebuilt, /* in: InnoDB prebuilt struct */ THD* thd) /* in: user thread */ { - mysql_byte* original_upd_buff = upd_buff; + uchar* original_upd_buff = upd_buff; Field* field; enum_field_types field_mysql_type; uint n_fields; @@ -3785,8 +3702,8 @@ int ha_innobase::update_row( /*====================*/ /* out: error number or 0 */ - const mysql_byte* old_row,/* in: old row in MySQL format */ - mysql_byte* new_row)/* in: new row in MySQL format */ + const uchar* old_row, /* in: old row in MySQL format */ + uchar* new_row) /* in: new row in MySQL format */ { upd_t* uvect; int error = 0; @@ -3808,7 +3725,7 @@ ha_innobase::update_row( /* Build an update vector from the modified fields in the rows (uses upd_buff of the handle) */ - calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table, + calc_row_difference(uvect, (uchar*) old_row, new_row, table, upd_buff, (ulint)upd_and_key_val_buff_len, prebuilt, user_thd); @@ -3865,8 +3782,8 @@ Deletes a row given as the parameter. */ int ha_innobase::delete_row( /*====================*/ - /* out: error number or 0 */ - const mysql_byte* record) /* in: a row in MySQL format */ + /* out: error number or 0 */ + const uchar* record) /* in: a row in MySQL format */ { int error = 0; trx_t* trx = thd_to_trx(user_thd); @@ -3879,12 +3796,23 @@ ha_innobase::delete_row( if (table->found_next_number_field && record == table->record[0]) { ulonglong dummy = 0; - error = innobase_get_auto_increment(&dummy); + /* First check whether the AUTOINC sub-system has been + initialized using the AUTOINC mutex. If not then we + do it the "proper" way, by acquiring the heavier locks. */ + dict_table_autoinc_lock(prebuilt->table); - if (error == DB_SUCCESS) { + if (!prebuilt->table->autoinc_inited) { + dict_table_autoinc_unlock(prebuilt->table); + + error = innobase_get_auto_increment(&dummy); + + if (error == DB_SUCCESS) { + dict_table_autoinc_unlock(prebuilt->table); + } else { + goto error_exit; + } + } else { dict_table_autoinc_unlock(prebuilt->table); - } else { - goto error_exit; } } @@ -4133,9 +4061,9 @@ ha_innobase::index_read( /*====================*/ /* out: 0, HA_ERR_KEY_NOT_FOUND, or error number */ - mysql_byte* buf, /* in/out: buffer for the returned + uchar* buf, /* in/out: buffer for the returned row */ - const mysql_byte* key_ptr,/* in: key value; if this is NULL + const uchar* key_ptr, /* in: key value; if this is NULL we position the cursor at the start or end of index; this can also contain an InnoDB row id, in @@ -4235,13 +4163,13 @@ row with the current key value or prefix. */ int ha_innobase::index_read_last( /*=========================*/ - /* out: 0, HA_ERR_KEY_NOT_FOUND, or an - error code */ - mysql_byte* buf, /* out: fetched row */ - const mysql_byte* key_ptr, /* in: key value, or a prefix of a full - key value */ - uint key_len) /* in: length of the key val or prefix - in bytes */ + /* out: 0, HA_ERR_KEY_NOT_FOUND, or an + error code */ + uchar* buf, /* out: fetched row */ + const uchar* key_ptr,/* in: key value, or a prefix of a full + key value */ + uint key_len)/* in: length of the key val or prefix + in bytes */ { return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST)); } @@ -4339,10 +4267,10 @@ int ha_innobase::index_read_idx( /*========================*/ /* out: error number or 0 */ - mysql_byte* buf, /* in/out: buffer for the returned + uchar* buf, /* in/out: buffer for the returned row */ uint keynr, /* in: use this index */ - const mysql_byte* key, /* in: key value; if this is NULL + const uchar* key, /* in: key value; if this is NULL we position the cursor at the start or end of index */ uint key_len, /* in: key value length */ @@ -4365,7 +4293,7 @@ ha_innobase::general_fetch( /*=======================*/ /* out: 0, HA_ERR_END_OF_FILE, or error number */ - mysql_byte* buf, /* in/out: buffer for next row in MySQL + uchar* buf, /* in/out: buffer for next row in MySQL format */ uint direction, /* in: ROW_SEL_NEXT or ROW_SEL_PREV */ uint match_mode) /* in: 0, ROW_SEL_EXACT, or @@ -4413,7 +4341,7 @@ ha_innobase::index_next( /*====================*/ /* out: 0, HA_ERR_END_OF_FILE, or error number */ - mysql_byte* buf) /* in/out: buffer for next row in MySQL + uchar* buf) /* in/out: buffer for next row in MySQL format */ { ha_statistic_increment(&SSV::ha_read_next_count); @@ -4429,8 +4357,8 @@ ha_innobase::index_next_same( /*=========================*/ /* out: 0, HA_ERR_END_OF_FILE, or error number */ - mysql_byte* buf, /* in/out: buffer for the row */ - const mysql_byte* key, /* in: key value */ + uchar* buf, /* in/out: buffer for the row */ + const uchar* key, /* in: key value */ uint keylen) /* in: key value length */ { ha_statistic_increment(&SSV::ha_read_next_count); @@ -4445,10 +4373,8 @@ positioned using index_read. */ int ha_innobase::index_prev( /*====================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error - number */ - mysql_byte* buf) /* in/out: buffer for previous row in MySQL - format */ + /* out: 0, HA_ERR_END_OF_FILE, or error number */ + uchar* buf) /* in/out: buffer for previous row in MySQL format */ { ha_statistic_increment(&SSV::ha_read_prev_count); @@ -4462,9 +4388,8 @@ corresponding row to buf. */ int ha_innobase::index_first( /*=====================*/ - /* out: 0, HA_ERR_END_OF_FILE, - or error code */ - mysql_byte* buf) /* in/out: buffer for the row */ + /* out: 0, HA_ERR_END_OF_FILE, or error code */ + uchar* buf) /* in/out: buffer for the row */ { int error; @@ -4489,8 +4414,8 @@ corresponding row to buf. */ int ha_innobase::index_last( /*====================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error code */ - mysql_byte* buf) /* in/out: buffer for the row */ + /* out: 0, HA_ERR_END_OF_FILE, or error code */ + uchar* buf) /* in/out: buffer for the row */ { int error; @@ -4559,7 +4484,7 @@ int ha_innobase::rnd_next( /*==================*/ /* out: 0, HA_ERR_END_OF_FILE, or error number */ - mysql_byte* buf)/* in/out: returns the row in this buffer, + uchar* buf) /* in/out: returns the row in this buffer, in MySQL format */ { int error; @@ -4588,14 +4513,12 @@ Fetches a row from the table based on a row reference. */ int ha_innobase::rnd_pos( /*=================*/ - /* out: 0, HA_ERR_KEY_NOT_FOUND, - or error code */ - mysql_byte* buf, /* in/out: buffer for the row */ - mysql_byte* pos) /* in: primary key value of the row in the - MySQL format, or the row id if the clustered - index was internally generated by InnoDB; - the length of data in pos has to be - ref_length */ + /* out: 0, HA_ERR_KEY_NOT_FOUND, or error code */ + uchar* buf, /* in/out: buffer for the row */ + uchar* pos) /* in: primary key value of the row in the + MySQL format, or the row id if the clustered + index was internally generated by InnoDB; the + length of data in pos has to be ref_length */ { int error; uint keynr = active_index; @@ -4648,7 +4571,7 @@ was positioned the last time. */ void ha_innobase::position( /*==================*/ - const mysql_byte* record) /* in: row in MySQL format */ + const uchar* record) /* in: row in MySQL format */ { uint len; @@ -5561,7 +5484,7 @@ ha_innobase::records_in_range( { KEY* key; dict_index_t* index; - mysql_byte* key_val_buff2 = (mysql_byte*) my_malloc( + uchar* key_val_buff2 = (uchar*) my_malloc( table->s->reclength + table->s->max_key_length + 100, MYF(MY_FAE)); @@ -5606,7 +5529,7 @@ ha_innobase::records_in_range( (ulint)upd_and_key_val_buff_len, index, (byte*) (min_key ? min_key->key : - (const mysql_byte*) 0), + (const uchar*) 0), (ulint) (min_key ? min_key->length : 0), prebuilt->trx); @@ -5614,7 +5537,7 @@ ha_innobase::records_in_range( range_end, (byte*) key_val_buff2, buff2_len, index, (byte*) (max_key ? max_key->key : - (const mysql_byte*) 0), + (const uchar*) 0), (ulint) (max_key ? max_key->length : 0), prebuilt->trx); @@ -7024,12 +6947,12 @@ bool innobase_show_status(handlerton *hton, THD* thd, locking. ****************************************************************************/ -static mysql_byte* innobase_get_key(INNOBASE_SHARE* share, size_t *length, +static uchar* innobase_get_key(INNOBASE_SHARE* share, size_t *length, my_bool not_used __attribute__((unused))) { *length=share->table_name_length; - return((mysql_byte*) share->table_name); + return (uchar*) share->table_name; } static INNOBASE_SHARE* get_share(const char* table_name) @@ -7039,7 +6962,7 @@ static INNOBASE_SHARE* get_share(const char* table_name) uint length=(uint) strlen(table_name); if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables, - (mysql_byte*) table_name, + (uchar*) table_name, length))) { share = (INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1, @@ -7050,7 +6973,7 @@ static INNOBASE_SHARE* get_share(const char* table_name) strmov(share->table_name,table_name); if (my_hash_insert(&innobase_open_tables, - (mysql_byte*) share)) { + (uchar*) share)) { pthread_mutex_unlock(&innobase_share_mutex); my_free(share,0); @@ -7072,7 +6995,7 @@ static void free_share(INNOBASE_SHARE* share) pthread_mutex_lock(&innobase_share_mutex); if (!--share->use_count) { - hash_delete(&innobase_open_tables, (mysql_byte*) share); + hash_delete(&innobase_open_tables, (uchar*) share); thr_lock_delete(&share->lock); pthread_mutex_destroy(&share->mutex); my_free(share, MYF(0)); @@ -7512,13 +7435,24 @@ ha_innobase::get_auto_increment( *nb_reserved_values = prebuilt->trx->n_autoinc_rows; - /* Compute the last value in the interval */ - prebuilt->last_value = *first_value + (*nb_reserved_values * increment); + /* With old style AUTOINC locking we only update the table's + AUTOINC counter after attempting to insert the row. */ + if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) { - ut_a(prebuilt->last_value >= *first_value); + /* Compute the last value in the interval */ + prebuilt->last_value = *first_value + + (*nb_reserved_values * increment); - /* Update the table autoinc variable */ - dict_table_autoinc_update(prebuilt->table, prebuilt->last_value); + ut_a(prebuilt->last_value >= *first_value); + + /* Update the table autoinc variable */ + dict_table_autoinc_update( + prebuilt->table, prebuilt->last_value); + } else { + /* This will force write_row() into attempting an update + of the table's AUTOINC counter. */ + prebuilt->last_value = 0; + } /* The increment to be used to increase the AUTOINC value, we use this in write_row() and update_row() to increase the autoinc counter @@ -7573,9 +7507,9 @@ ha_innobase::cmp_ref( /*=================*/ /* out: < 0 if ref1 < ref2, 0 if equal, else > 0 */ - const mysql_byte* ref1, /* in: an (internal) primary key value in the + const uchar* ref1, /* in: an (internal) primary key value in the MySQL key value format */ - const mysql_byte* ref2) /* in: an (internal) primary key value in the + const uchar* ref2) /* in: an (internal) primary key value in the MySQL key value format */ { enum_field_types mysql_type; @@ -9247,6 +9181,17 @@ static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path, "Path to individual files and their sizes.", NULL, NULL, NULL); +static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode, + PLUGIN_VAR_RQCMDARG, + "The AUTOINC lock modes supported by InnoDB:\n" + " 0 => Old style AUTOINC locking (for backward compatibility)\n" + " 1 => New style AUTOINC locking\n" + " 2 => No AUTOINC locking (unsafe for SBR)", + NULL, NULL, + AUTOINC_NEW_STYLE_LOCKING, /* Default setting */ + AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */ + AUTOINC_NO_LOCKING, 0); /* Maximum value */ + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(autoextend_increment), @@ -9285,6 +9230,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(table_locks), MYSQL_SYSVAR(thread_concurrency), MYSQL_SYSVAR(thread_sleep_delay), + MYSQL_SYSVAR(autoinc_lock_mode), NULL }; diff --git a/ibuf/ibuf0ibuf.c b/ibuf/ibuf0ibuf.c index 8e4b258656d..c2b6e5522e2 100644 --- a/ibuf/ibuf0ibuf.c +++ b/ibuf/ibuf0ibuf.c @@ -1189,11 +1189,6 @@ ibuf_dummy_index_free( dict_mem_table_free(table); } -void -dict_index_print_low( -/*=================*/ - dict_index_t* index); /* in: index */ - /************************************************************************* Builds the entry to insert into a non-clustered index when we have the corresponding record in an ibuf index. */ diff --git a/include/dict0mem.h b/include/dict0mem.h index bd73a3db480..55cd03cb79a 100644 --- a/include/dict0mem.h +++ b/include/dict0mem.h @@ -340,11 +340,11 @@ struct dict_table_struct{ unsigned n_cols:10;/* number of columns */ dict_col_t* cols; /* array of column descriptions */ const char* col_names; - /* n_def column names packed in an - "name1\0name2\0...nameN\0" array. until - n_def reaches n_cols, this is allocated with - ut_malloc, and the final size array is - allocated through the table's heap. */ + /* Column names packed in a character string + "name1\0name2\0...nameN\0". Until + the string contains n_cols, it will be + allocated from a temporary heap. The final + string will be allocated from table->heap. */ hash_node_t name_hash; /* hash chain node */ hash_node_t id_hash; /* hash chain node */ UT_LIST_BASE_NODE_T(dict_index_t) @@ -444,6 +444,16 @@ struct dict_table_struct{ UT_LIST_BASE_NODE_T(row_prebuilt_t) prebuilts; /* base node for the prebuilts defined for the table */ + ulong n_waiting_or_granted_auto_inc_locks; + /* This counter is used to track the number + of granted and pending autoinc locks on this + table. This value is set after acquiring the + kernel mutex but we peek the contents to + determine whether other transactions have + acquired the AUTOINC lock or not. Of course + only one transaction can be granted the + lock but there can be multiple waiters. */ + #ifdef UNIV_DEBUG ulint magic_n;/* magic number */ # define DICT_TABLE_MAGIC_N 76333786 diff --git a/include/mach0data.h b/include/mach0data.h index aeba3260c14..aae8224080b 100644 --- a/include/mach0data.h +++ b/include/mach0data.h @@ -364,6 +364,17 @@ mach_write_to_2_little_endian( byte* dest, /* in: where to write */ ulint n); /* in: unsigned long int to write */ +/************************************************************* +Convert integral type from storage byte order (big endian) to +host byte order. */ +UNIV_INLINE +void +mach_read_int_type( +/*===============*/ + byte* dest, /* out: where to write */ + const byte* src, /* in: where to read from */ + ulint len, /* in: length of src */ + ibool unsigned_type); /* in: signed or unsigned flag */ #ifndef UNIV_NONINL #include "mach0data.ic" #endif diff --git a/include/mach0data.ic b/include/mach0data.ic index 4967c80846f..8b2e1fb337b 100644 --- a/include/mach0data.ic +++ b/include/mach0data.ic @@ -7,6 +7,8 @@ to the machine format. Created 11/28/1995 Heikki Tuuri ***********************************************************************/ +#include "ut0mem.h" + /*********************************************************** The following function is used to store data in one byte. */ UNIV_INLINE @@ -723,3 +725,38 @@ mach_write_to_2_little_endian( *dest = (byte)(n & 0xFFUL); } + +/************************************************************* +Convert integral type from storage byte order (big endian) to +host byte order. */ +UNIV_INLINE +void +mach_read_int_type( +/*===============*/ + byte* dest, /* out: where to write */ + const byte* src, /* in: where to read from */ + ulint len, /* in: length of src */ + ibool unsigned_type) /* in: signed or unsigned flag */ +{ +#ifdef WORDS_BIGENDIAN + memcpy(dest, src, len); + + if (!unsigned_type) { + dest[0] ^= 128; + } +#else + byte* ptr; + + /* Convert integer data from Innobase to a little-endian format, + sign bit restored to normal. */ + + for (ptr = dest + len; ptr != dest; ++src) { + --ptr; + *ptr = *src; + } + + if (!unsigned_type) { + dest[len - 1] ^= 128; + } +#endif +} diff --git a/include/row0mysql.h b/include/row0mysql.h index 1ed27314aec..bf1e04fb4ed 100644 --- a/include/row0mysql.h +++ b/include/row0mysql.h @@ -340,9 +340,11 @@ row_mysql_unfreeze_data_dictionary( trx_t* trx); /* in: transaction */ #ifndef UNIV_HOTBACKUP /************************************************************************* -Does a table creation operation for MySQL. If the name of the created -table ends to characters INNODB_MONITOR, then this also starts -printing of monitor output by the master thread. */ +Drops a table for MySQL. If the name of the table ends in +one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", +"innodb_table_monitor", then this will also start the printing of monitor +output by the master thread. If the table name ends in "innodb_mem_validate", +InnoDB will try to invoke mem_validate(). */ int row_create_table_for_mysql( @@ -420,8 +422,9 @@ row_truncate_table_for_mysql( dict_table_t* table, /* in: table handle */ trx_t* trx); /* in: transaction handle */ /************************************************************************* -Drops a table for MySQL. If the name of the dropped table ends to -characters INNODB_MONITOR, then this also stops printing of monitor +Drops a table for MySQL. If the name of the dropped table ends in +one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", +"innodb_table_monitor", then this will also stop the printing of monitor output by the master thread. */ int diff --git a/include/sync0rw.h b/include/sync0rw.h index abf04253141..7d2f63803d0 100644 --- a/include/sync0rw.h +++ b/include/sync0rw.h @@ -101,6 +101,7 @@ void rw_lock_free( /*=========*/ rw_lock_t* lock); /* in: rw-lock */ +#ifdef UNIV_DEBUG /********************************************************************** Checks that the rw-lock has been initialized and that there are no simultaneous shared and exclusive locks. */ @@ -109,6 +110,7 @@ ibool rw_lock_validate( /*=============*/ rw_lock_t* lock); +#endif /* UNIV_DEBUG */ /****************************************************************** NOTE! The following macros should be used in rw s-locking, not the corresponding function. */ diff --git a/include/sync0rw.ic b/include/sync0rw.ic index f8b5367739a..b41593d0a96 100644 --- a/include/sync0rw.ic +++ b/include/sync0rw.ic @@ -231,7 +231,7 @@ rw_lock_s_lock_func( owns an s-lock here, it may end up in a deadlock with another thread which requests an x-lock here. Therefore, we will forbid recursive s-locking of a latch: the following assert will warn the programmer - of the possibility of a tjis kind of deadlock. If we want to implement + of the possibility of this kind of a deadlock. If we want to implement safe recursive s-locking, we should keep in a list the thread ids of the threads which have s-locked a latch. This would use some CPU time. */ diff --git a/include/trx0trx.h b/include/trx0trx.h index 73658300afc..c03fa04364c 100644 --- a/include/trx0trx.h +++ b/include/trx0trx.h @@ -510,31 +510,6 @@ struct trx_struct{ ib_longlong mysql_log_offset;/* if MySQL binlog is used, this field contains the end offset of the binlog entry */ - const char* mysql_master_log_file_name; - /* if the database server is a MySQL - replication slave, we have here the - master binlog name up to which - replication has processed; otherwise - this is a pointer to a null - character */ - ib_longlong mysql_master_log_pos; - /* if the database server is a MySQL - replication slave, this is the - position in the log file up to which - replication has processed */ - /* A MySQL variable mysql_thd->synchronous_repl tells if we have - to use synchronous replication. See ha_innodb.cc. */ - char* repl_wait_binlog_name;/* NULL, or if synchronous MySQL - replication is used, the binlog name - up to which we must communicate the - binlog to the slave, before returning - from a commit; this is the same as - mysql_log_file_name, but we allocate - and copy the name to a separate buffer - here */ - ib_longlong repl_wait_binlog_pos;/* see above at - repl_wait_binlog_name */ - os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated with this transaction object */ ulint mysql_process_no;/* since in Linux, 'top' reports diff --git a/include/ut0mem.h b/include/ut0mem.h index d8478a43234..331d62a6264 100644 --- a/include/ut0mem.h +++ b/include/ut0mem.h @@ -63,7 +63,7 @@ ut_test_malloc( /* out: TRUE if succeeded */ ulint n); /* in: try to allocate this many bytes */ /************************************************************************** -Frees a memory bloock allocated with ut_malloc. */ +Frees a memory block allocated with ut_malloc. */ void ut_free( diff --git a/lock/lock0lock.c b/lock/lock0lock.c index 7ebe7000029..9563983f09f 100644 --- a/lock/lock0lock.c +++ b/lock/lock0lock.c @@ -3544,6 +3544,10 @@ lock_table_create( ut_ad(table && trx); ut_ad(mutex_own(&kernel_mutex)); + if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) { + ++table->n_waiting_or_granted_auto_inc_locks; + } + if (type_mode == LOCK_AUTO_INC) { /* Only one trx can have the lock on the table at a time: we may use the memory preallocated @@ -3594,6 +3598,9 @@ lock_table_remove_low( if (lock == trx->auto_inc_lock) { trx->auto_inc_lock = NULL; + + ut_a(table->n_waiting_or_granted_auto_inc_locks > 0); + --table->n_waiting_or_granted_auto_inc_locks; } UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock); diff --git a/mysql-test/innodb.result b/mysql-test/innodb.result index c29fe55a934..f80ccf600c5 100644 --- a/mysql-test/innodb.result +++ b/mysql-test/innodb.result @@ -479,6 +479,7 @@ passwd varchar(32) binary DEFAULT '' NOT NULL, PRIMARY KEY (id), UNIQUE ggid (ggid) ) ENGINE=innodb; +set global innodb_autoinc_lock_mode = 0; insert into t1 (ggid,passwd) values ('test1','xxx'); insert into t1 (ggid,passwd) values ('test2','yyy'); insert into t1 (ggid,passwd) values ('test2','this will fail'); diff --git a/mysql-test/innodb.test b/mysql-test/innodb.test index c806bc8593a..10fb3d6fbf1 100644 --- a/mysql-test/innodb.test +++ b/mysql-test/innodb.test @@ -345,6 +345,9 @@ CREATE TABLE t1 ( UNIQUE ggid (ggid) ) ENGINE=innodb; +# Set to old style locking +set global innodb_autoinc_lock_mode = 0; + insert into t1 (ggid,passwd) values ('test1','xxx'); insert into t1 (ggid,passwd) values ('test2','yyy'); -- error ER_DUP_ENTRY diff --git a/row/row0mysql.c b/row/row0mysql.c index 689075249e2..cba48295e3e 100644 --- a/row/row0mysql.c +++ b/row/row0mysql.c @@ -1793,10 +1793,11 @@ row_mysql_unlock_data_dictionary( #ifndef UNIV_HOTBACKUP /************************************************************************* -Does a table creation operation for MySQL. If the name of the table -to be created is equal with one of the predefined magic table names, -then this also starts printing the corresponding monitor output by -the master thread. */ +Drops a table for MySQL. If the name of the table ends in +one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", +"innodb_table_monitor", then this will also start the printing of monitor +output by the master thread. If the table name ends in "innodb_mem_validate", +InnoDB will try to invoke mem_validate(). */ int row_create_table_for_mysql( @@ -3032,9 +3033,10 @@ funct_exit: } /************************************************************************* -Drops a table for MySQL. If the name of the table to be dropped is equal -with one of the predefined magic table names, then this also stops printing -the corresponding monitor output by the master thread. */ +Drops a table for MySQL. If the name of the dropped table ends in +one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", +"innodb_table_monitor", then this will also stop the printing of monitor +output by the master thread. */ int row_drop_table_for_mysql( diff --git a/row/row0sel.c b/row/row0sel.c index d4c967f76f4..d231b48a1b1 100644 --- a/row/row0sel.c +++ b/row/row0sel.c @@ -4591,10 +4591,10 @@ row_search_autoinc_read_column( ibool unsigned_type) /* in: signed or unsigned flag */ { ulint len; - byte* ptr; const byte* data; ib_longlong value; mem_heap_t* heap = NULL; + /* Our requirement is that dest should be word aligned. */ byte dest[sizeof(value)]; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; @@ -4613,34 +4613,25 @@ row_search_autoinc_read_column( ut_a(len != UNIV_SQL_NULL); ut_a(len <= sizeof value); - /* Convert integer data from Innobase to a little-endian format, - sign bit restored to normal */ + mach_read_int_type(dest, data, len, unsigned_type); - for (ptr = dest + len; ptr != dest; ++data) { - --ptr; - *ptr = *data; - } - - if (!unsigned_type) { - dest[len - 1] ^= 128; - } - - /* The assumption here is that the AUTOINC value can't be negative.*/ + /* The assumption here is that the AUTOINC value can't be negative + and that dest is word aligned. */ switch (len) { case 8: - value = *(ib_longlong*) ptr; + value = *(ib_longlong*) dest; break; case 4: - value = *(ib_uint32_t*) ptr; + value = *(ib_uint32_t*) dest; break; case 2: - value = *(uint16 *) ptr; + value = *(uint16 *) dest; break; case 1: - value = *ptr; + value = *dest; break; default: diff --git a/sync/sync0rw.c b/sync/sync0rw.c index 4db780c8b3f..8dbc69816e9 100644 --- a/sync/sync0rw.c +++ b/sync/sync0rw.c @@ -174,9 +174,7 @@ rw_lock_free( /*=========*/ rw_lock_t* lock) /* in: rw-lock */ { -#ifdef UNIV_DEBUG - ut_a(rw_lock_validate(lock)); -#endif /* UNIV_DEBUG */ + ut_ad(rw_lock_validate(lock)); ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); ut_a(rw_lock_get_waiters(lock) == 0); ut_a(rw_lock_get_reader_count(lock) == 0); @@ -199,6 +197,7 @@ rw_lock_free( mutex_exit(&rw_lock_list_mutex); } +#ifdef UNIV_DEBUG /********************************************************************** Checks that the rw-lock has been initialized and that there are no simultaneous shared and exclusive locks. */ @@ -226,6 +225,7 @@ rw_lock_validate( return(TRUE); } +#endif /* UNIV_DEBUG */ /********************************************************************** Lock an rw-lock in shared mode for the current thread. If the rw-lock is diff --git a/trx/trx0sys.c b/trx/trx0sys.c index c8ed86d77e3..0aec1e42b5d 100644 --- a/trx/trx0sys.c +++ b/trx/trx0sys.c @@ -656,6 +656,7 @@ trx_sys_update_mysql_binlog_offset( MLOG_4BYTES, mtr); } +#ifdef UNIV_HOTBACKUP /********************************************************************* Prints to stderr the MySQL binlog info in the system header if the magic number shows it valid. */ @@ -688,6 +689,7 @@ trx_sys_print_mysql_binlog_offset_from_page( + TRX_SYS_MYSQL_LOG_NAME); } } +#endif /* UNIV_HOTBACKUP */ /********************************************************************* Stores the MySQL binlog offset info in the trx system header if diff --git a/trx/trx0trx.c b/trx/trx0trx.c index 7459286e1d1..c73946f8c78 100644 --- a/trx/trx0trx.c +++ b/trx/trx0trx.c @@ -112,11 +112,6 @@ trx_create( trx->mysql_log_file_name = NULL; trx->mysql_log_offset = 0; - trx->mysql_master_log_file_name = ""; - trx->mysql_master_log_pos = 0; - - trx->repl_wait_binlog_name = NULL; - trx->repl_wait_binlog_pos = 0; mutex_create(&trx->undo_mutex, SYNC_TRX_UNDO); @@ -287,11 +282,6 @@ trx_free( trx_undo_arr_free(trx->undo_no_arr); } - if (trx->repl_wait_binlog_name != NULL) { - - mem_free(trx->repl_wait_binlog_name); - } - ut_a(UT_LIST_GET_LEN(trx->signals) == 0); ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0); @@ -774,14 +764,6 @@ trx_commit_off_kernel( trx->mysql_log_file_name = NULL; } - if (trx->mysql_master_log_file_name[0] != '\0') { - /* This database server is a MySQL replication slave */ - trx_sys_update_mysql_binlog_offset( - trx->mysql_master_log_file_name, - trx->mysql_master_log_pos, - TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr); - } - /* The following call commits the mini-transaction, making the whole transaction committed in the file-based world, at this log sequence number. The transaction becomes 'durable' when