diff --git a/data/data0data.c b/data/data0data.c index 549bc59581a..9331ce78d8e 100644 --- a/data/data0data.c +++ b/data/data0data.c @@ -383,6 +383,7 @@ dfield_print_also_hex( const byte* data; ulint len; ulint mtype; + ulint prtype; ulint i; ibool print_also_hex; @@ -396,6 +397,7 @@ dfield_print_also_hex( } mtype = dtype_get_mtype(dfield_get_type(dfield)); + prtype = dtype_get_prtype(dfield_get_type(dfield)); if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) { @@ -403,11 +405,14 @@ dfield_print_also_hex( for (i = 0; i < len; i++) { int c = *data++; + if (!isprint(c)) { print_also_hex = TRUE; - c = ' '; + + fprintf(stderr, "\\x%02x", (unsigned char) c); + } else { + putc(c, stderr); } - putc(c, stderr); } if (!print_also_hex) { @@ -422,13 +427,122 @@ dfield_print_also_hex( for (i = 0; i < len; i++) { fprintf(stderr, "%02lx", (ulint)*data); + data++; + } + } else if (mtype == DATA_BINARY) { + data = dfield_get_data(dfield); + fputs(" Hex: ",stderr); + + for (i = 0; i < len; i++) { + fprintf(stderr, "%02lx", (ulint)*data); data++; } } else if (mtype == DATA_INT) { - ut_a(len == 4); /* only works for 32-bit integers */ - fprintf(stderr, "%d", (int)mach_read_from_4(data)); + dulint big_val; + + if (len == 1) { + ulint val; + + val = (ulint)mach_read_from_1(data); + + if (!(prtype & DATA_UNSIGNED)) { + val &= ~0x80; + fprintf(stderr, "%ld", (long) val); + } else { + fprintf(stderr, "%lu", (ulong) val); + } + + } else if (len == 2) { + ulint val; + + val = (ulint)mach_read_from_2(data); + + if (!(prtype & DATA_UNSIGNED)) { + val &= ~0x8000; + fprintf(stderr, "%ld", (long) val); + } else { + fprintf(stderr, "%lu", (ulong) val); + } + + } else if (len == 3) { + ulint val; + + val = (ulint)mach_read_from_3(data); + + if (!(prtype & DATA_UNSIGNED)) { + val &= ~0x800000; + fprintf(stderr, "%ld", (long) val); + } else { + fprintf(stderr, "%lu", (ulong) val); + } + + } else if (len == 4) { + ulint val; + + val = (ulint)mach_read_from_4(data); + + if (!(prtype & DATA_UNSIGNED)) { + val &= ~0x80000000; + fprintf(stderr, "%ld", (long) val); + } else { + fprintf(stderr, "%lu", (ulong) val); + } + + } else if (len == 6) { + big_val = (dulint)mach_read_from_6(data); + fprintf(stderr, "{%lu %lu}", + ut_dulint_get_high(big_val), + ut_dulint_get_low(big_val)); + } else if (len == 7) { + big_val = (dulint)mach_read_from_7(data); + fprintf(stderr, "{%lu %lu}", + ut_dulint_get_high(big_val), + ut_dulint_get_low(big_val)); + } else if (len == 8) { + big_val = (dulint)mach_read_from_8(data); + fprintf(stderr, "{%lu %lu}", + ut_dulint_get_high(big_val), + ut_dulint_get_low(big_val)); + } else { + fputs(" Hex: ",stderr); + + for (i = 0; i < len; i++) { + fprintf(stderr, "%02lx", (ulint)*data); + data++; + } + } + } else if (mtype == DATA_SYS) { + dulint id; + + if (prtype & DATA_TRX_ID) { + id = mach_read_from_6(data); + + fprintf(stderr, "trx_id {%lu %lu}", + ut_dulint_get_high(id), ut_dulint_get_low(id)); + } else if (prtype & DATA_ROLL_PTR) { + id = mach_read_from_7(data); + + fprintf(stderr, "roll_ptr {%lu %lu}", + ut_dulint_get_high(id), ut_dulint_get_low(id)); + } else if (prtype & DATA_ROW_ID) { + id = mach_read_from_6(data); + + fprintf(stderr, "row_id {%lu %lu}", + ut_dulint_get_high(id), ut_dulint_get_low(id)); + } else { + id = mach_dulint_read_compressed(data); + + fprintf(stderr, "mix_id {%lu %lu}", + ut_dulint_get_high(id), ut_dulint_get_low(id)); + } + } else { - ut_error; + fputs(" Hex: ",stderr); + + for (i = 0; i < len; i++) { + fprintf(stderr, "%02lx", (ulint)*data); + data++; + } } } diff --git a/dict/dict0crea.c b/dict/dict0crea.c index 766448bffa9..7dd4010e414 100644 --- a/dict/dict0crea.c +++ b/dict/dict0crea.c @@ -540,7 +540,11 @@ dict_build_index_def_step( ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || dict_index_is_clust(index)); - index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID); + /* For fast index creation we have already allocated an index id + for this index so that we could write an UNDO log record for it.*/ + if (ut_dulint_is_zero(index->id)) { + index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID); + } /* Inherit the space id from the table; we store all indexes of a table in the same tablespace */ @@ -552,6 +556,9 @@ dict_build_index_def_step( ins_node_set_new_row(node->ind_def, row); + /* Note that the index was created by this transaction. */ + index->trx_id = trx->id; + return(DB_SUCCESS); } diff --git a/dict/dict0dict.c b/dict/dict0dict.c index 5caa060ed69..018bc339354 100644 --- a/dict/dict0dict.c +++ b/dict/dict0dict.c @@ -26,6 +26,7 @@ Created 1/8/1996 Heikki Tuuri #include "pars0sym.h" #include "que0que.h" #include "rem0cmp.h" +#include "row0merge.h" #ifndef UNIV_HOTBACKUP # include "m_ctype.h" /* my_isspace() */ #endif /* !UNIV_HOTBACKUP */ @@ -366,19 +367,6 @@ dict_table_get_next_index_noninline( return(dict_table_get_next_index(index)); } -/************************************************************************** -Returns an index object. */ - -dict_index_t* -dict_table_get_index_noninline( -/*===========================*/ - /* out: index, NULL if does not exist */ - dict_table_t* table, /* in: table */ - const char* name) /* in: index name */ -{ - return(dict_table_get_index(table, name)); -} - /************************************************************************** Returns a column's name. */ @@ -539,6 +527,33 @@ dict_table_autoinc_update( mutex_exit(&(table->autoinc_mutex)); } +/************************************************************************** +Looks for an index with the given table and index id. +NOTE that we do not reserve the dictionary mutex. */ +dict_index_t* +dict_index_get_on_id_low( +/*=====================*/ + /* out: index or NULL if not found from cache */ + dict_table_t* table, /* in: table */ + dulint id) /* in: index id */ +{ + dict_index_t* index; + + index = dict_table_get_first_index(table); + + while (index) { + if (0 == ut_dulint_cmp(id, index->id)) { + /* Found */ + + return(index); + } + + index = dict_table_get_next_index(index); + } + + return(NULL); +} + /************************************************************************ Looks for column n in an index. */ @@ -806,7 +821,7 @@ dict_table_get( does not exist */ const char* table_name, /* in: table name */ ibool inc_mysql_count) - /* in: whether to increment the open + /* in: whether to increment the open handle count on the table */ { dict_table_t* table; @@ -1358,20 +1373,6 @@ dict_index_add_to_cache( ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); ut_ad(mem_heap_validate(index->heap)); - -#ifdef UNIV_DEBUG - { - dict_index_t* index2; - index2 = UT_LIST_GET_FIRST(table->indexes); - - while (index2 != NULL) { - ut_ad(ut_strcmp(index->name, index2->name) != 0); - - index2 = UT_LIST_GET_NEXT(indexes, index2); - } - } -#endif /* UNIV_DEBUG */ - ut_a(!dict_index_is_clust(index) || UT_LIST_GET_LEN(table->indexes) == 0); @@ -1412,7 +1413,10 @@ dict_index_add_to_cache( dict_index_get_nth_field(new_index, i)->col->ord_part = 1; } - new_index->page = (unsigned int) page_no; + new_index->stat_index_size = 1; + new_index->stat_n_leaf_pages = 1; + + new_index->page = page_no; rw_lock_create(&new_index->lock, SYNC_INDEX_TREE); if (!UNIV_UNLIKELY(new_index->type & DICT_UNIVERSAL)) { @@ -1497,7 +1501,7 @@ dict_index_find_cols( /* It is an error not to find a matching column. */ ut_error; - found: +found: ; } } @@ -1873,18 +1877,92 @@ dict_index_build_internal_non_clust( Checks if a table is referenced by foreign keys. */ ibool -dict_table_referenced_by_foreign_key( -/*=================================*/ - /* out: TRUE if table is referenced by a - foreign key */ - dict_table_t* table) /* in: InnoDB table */ +dict_table_is_referenced_by_foreign_key( +/*====================================*/ + /* out: TRUE if table is referenced + by a foreign key */ + const dict_table_t* table) /* in: InnoDB table */ { - if (UT_LIST_GET_LEN(table->referenced_list) > 0) { + return(UT_LIST_GET_LEN(table->referenced_list) > 0); +} - return(TRUE); +/************************************************************************* +Check if the index is referenced by a foreign key, if TRUE return foreign +else return NULL */ + +dict_foreign_t* +dict_table_get_referenced_constraint( +/*=================================*/ + /* out: pointer to foreign key struct if index + is defined for foreign key, otherwise NULL */ + dict_table_t* table, /* in: InnoDB table */ + dict_index_t* index) /* in: InnoDB index */ +{ + dict_foreign_t* foreign = NULL; + + ut_ad(index && table); + + /* If the referenced list is empty, nothing to do */ + + if (UT_LIST_GET_LEN(table->referenced_list) == 0) { + + return(NULL); } - return(FALSE); + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign) { + if (foreign->referenced_index == index + || foreign->referenced_index == index) { + + return(foreign); + } + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + + return(NULL); +} + +/************************************************************************* +Checks if a index is defined for a foreign key constraint. Index is a part +of a foreign key constraint if the index is referenced by foreign key +or index is a foreign key index. */ + +dict_foreign_t* +dict_table_get_foreign_constraint( +/*==============================*/ + /* out: pointer to foreign key struct if index + is defined for foreign key, otherwise NULL */ + dict_table_t* table, /* in: InnoDB table */ + dict_index_t* index) /* in: InnoDB index */ +{ + dict_foreign_t* foreign = NULL; + + ut_ad(index && table); + + /* If list empty then nothgin to do */ + + if (UT_LIST_GET_LEN(table->foreign_list) == 0) { + + return(NULL); + } + + /* Check whether this index is defined for a foreign key */ + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + while (foreign) { + if (foreign->foreign_index == index + || foreign->referenced_index == index) { + + return(foreign); + } + + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + + return(NULL); } /************************************************************************* @@ -1967,7 +2045,8 @@ dict_foreign_find( /************************************************************************* Tries to find an index whose first fields are the columns in the array, -in the same order. */ +in the same order and is not marked for deletion and is not the same +as types_idx. */ static dict_index_t* dict_foreign_find_index( @@ -1986,16 +2065,23 @@ dict_foreign_find_index( be declared NOT NULL */ { dict_index_t* index; - dict_field_t* field; - const char* col_name; - ulint i; index = dict_table_get_first_index(table); while (index != NULL) { - if (dict_index_get_n_fields(index) >= n_cols) { + /* Ignore matches that refer to the same instance + or the index is to be dropped */ + if (index->to_be_dropped || types_idx == index) { + + goto next_rec; + + } else if (dict_index_get_n_fields(index) >= n_cols) { + ulint i; for (i = 0; i < n_cols; i++) { + dict_field_t* field; + const char* col_name; + field = dict_index_get_nth_field(index, i); col_name = dict_table_get_col_name( @@ -2036,12 +2122,73 @@ dict_foreign_find_index( } } +next_rec: index = dict_table_get_next_index(index); } return(NULL); } +/************************************************************************* +Tries to find an index whose fields match exactly, in the same order. If +more than one index is found then return the index with the higher id.*/ +static +dict_index_t* +dict_find_index_by_max_id( +/*======================*/ + /* out: matching index, NULL if not found */ + dict_table_t* table, /* in: table */ + const char* name, /* in: the index name to find */ + const char** columns,/* in: array of column names */ + ulint n_cols) /* in: number of columns */ +{ + dict_index_t* index; + dict_index_t* found; + + found = NULL; + index = dict_table_get_first_index(table); + + while (index != NULL) { + if (ut_strcmp(index->name, name) == 0 + && dict_index_get_n_ordering_defined_by_user(index) + == n_cols) { + + ulint i; + + for (i = 0; i < n_cols; i++) { + dict_field_t* field; + const char* col_name; + + field = dict_index_get_nth_field(index, i); + + col_name = dict_table_get_col_name( + table, dict_col_get_no(field->col)); + + if (0 != innobase_strcasecmp( + columns[i], col_name)) { + + break; + } + } + + if (i == n_cols) { + /* We found a matching index, select + the index with the higher id*/ + + if (!found + || ut_dulint_cmp(index->id, found->id) > 0) { + + found = index; + } + } + } + + index = dict_table_get_next_index(index); + } + + return(found); +} + /************************************************************************** Report an error in a foreign key definition. */ static @@ -4013,6 +4160,7 @@ dict_index_print_low( { ib_longlong n_vals; ulint i; + const char* type_string; ut_ad(mutex_own(&(dict_sys->mutex))); @@ -4023,6 +4171,14 @@ dict_index_print_low( n_vals = index->stat_n_diff_key_vals[1]; } + if (index->type & DICT_CLUSTERED) { + type_string = "clustered index"; + } else if (index->type & DICT_UNIQUE) { + type_string = "unique index"; + } else { + type_string = "secondary index"; + } + fprintf(stderr, " INDEX: name %s, id %lu %lu, fields %lu/%lu," " uniq %lu, type %lu\n" @@ -4261,6 +4417,7 @@ dict_print_info_on_foreign_keys( /************************************************************************ Displays the names of the index and the table. */ + void dict_index_name_print( /*==================*/ @@ -4273,3 +4430,417 @@ dict_index_name_print( fputs(" of table ", file); ut_print_name(file, trx, TRUE, index->table_name); } + +/************************************************************************** +Get index by name */ + +dict_index_t* +dict_table_get_index_on_name( +/*=========================*/ + /* out: index, NULL if does not exist */ + dict_table_t* table, /* in: table */ + const char* name) /* in: name of the index to find */ +{ + dict_index_t* index; + + index = dict_table_get_first_index(table); + + while (index != NULL) { + if (ut_strcmp(index->name, name) == 0) { + + return(index); + } + + index = dict_table_get_next_index(index); + } + + return(NULL); + +} + +/************************************************************************** +Find and index that is equivalent to the one passed in. */ + +dict_index_t* +dict_table_find_equivalent_index( +/*=============================*/ + dict_table_t* table, /* in/out: table */ + dict_index_t* index) /* in: index to match */ +{ + ulint i; + const char** column_names; + dict_index_t* equiv_index; + + if (UT_LIST_GET_LEN(table->foreign_list) == 0) { + + return(NULL); + } + + column_names = mem_alloc(index->n_fields * sizeof *column_names); + + /* Convert the column names to the format & type accepted by the find + index function */ + for (i = 0; i < index->n_fields; i++) { + column_names[i] = index->fields[i].name; + } + + equiv_index = dict_foreign_find_index( + table, (const char**)column_names, index->n_fields, + index, TRUE, FALSE); + + mem_free(column_names); + + return(equiv_index); +} + +/************************************************************************** +Replace the index passed in with another equivalent index in the tables +foreign key list. */ + +void +dict_table_replace_index_in_foreign_list( +/*=====================================*/ + dict_table_t* table, /* in/out: table */ + dict_index_t* index) /* in: index to be replaced */ +{ + dict_index_t* new_index; + + new_index = dict_table_find_equivalent_index(table, index); + + /* If match found */ + if (new_index) { + dict_foreign_t* foreign; + + ut_a(new_index != index); + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + /* If the list is not empty then this should hold */ + ut_a(foreign); + + /* Iterate over the foreign index list and replace the index + passed in with the new index */ + while (foreign) { + + if (foreign->foreign_index == index) { + foreign->foreign_index = new_index; + } + + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + } +} + +/************************************************************************** +In case there is more than one index with the same name return the index +with the min(id). */ + +dict_index_t* +dict_table_get_index_on_name_and_min_id( +/*=====================================*/ + /* out: index, NULL if does not exist */ + dict_table_t* table, /* in: table */ + const char* name) /* in: name of the index to find */ +{ + dict_index_t* index; + dict_index_t* min_index; /* Index with matching name and min(id) */ + + min_index = NULL; + index = dict_table_get_first_index(table); + + while (index != NULL) { + if (ut_strcmp(index->name, name) == 0) { + if (!min_index + || ut_dulint_cmp(index->id, min_index->id) < 0) { + + min_index = index; + } + } + + index = dict_table_get_next_index(index); + } + + return(min_index); + +} + +/************************************************************************** +Returns an index object by matching on the name and column names and +if more than one index matches return the index with the max id */ + +dict_index_t* +dict_table_get_index_by_max_id( +/*===========================*/ + /* out: index, NULL if does not exist */ + dict_table_t* table, /* in: table */ + const char* name, /* in: index name to find*/ + const char** column_names, /* in: column names to match */ + ulint n_cols) /* in: number of columns */ +{ + /* Find an exact match with the passed in index */ + return(dict_find_index_by_max_id(table, name, column_names, n_cols)); +} + +/************************************************************************** +Check for duplicate index entries in a table [using the index name] */ +#ifdef UNIV_DEBUG + +void +dict_table_check_for_dup_indexes( +/*=============================*/ + dict_table_t* table) /* in: Check for dup indexes in this table */ +{ + /* Check for duplicates, ignoring indexes that are marked + as to be dropped */ + + dict_index_t* index1; + dict_index_t* index2; + + /* The primary index _must_ exist */ + ut_a(UT_LIST_GET_LEN(table->indexes) > 0); + + index1 = UT_LIST_GET_FIRST(table->indexes); + index2 = UT_LIST_GET_NEXT(indexes, index1); + + while (index1 && index2) { + + while (index2) { + + if (!index2->to_be_dropped) { + ut_ad(ut_strcmp(index1->name, index2->name)); + } + + index2 = UT_LIST_GET_NEXT(indexes, index2); + } + + index1 = UT_LIST_GET_NEXT(indexes, index1); + index2 = UT_LIST_GET_NEXT(indexes, index1); + } +} +#endif /* UNIV_DEBUG */ + +/************************************************************************** +Create an undo list for the trx.*/ + +void +dict_undo_create_list( +/*==================*/ + trx_t* trx) /* out: dict_undo_t list */ +{ + ut_a(!trx->dict_undo_list); + + trx->dict_undo_list = mem_alloc(sizeof(*trx->dict_undo_list)); + + UT_LIST_INIT(*trx->dict_undo_list); +} + +/************************************************************************** +Create an dict_undo_t element and append to the undo list of the trx.*/ + +dict_undo_t* +dict_undo_create_element( +/*=====================*/ /* out: dict_undo_t element*/ + trx_t* trx) /* in: create & add elem to this trx */ +{ + dict_undo_t* dict_undo; + + ut_a(trx->dict_undo_list); + + dict_undo = mem_alloc(sizeof(*dict_undo)); + memset(dict_undo, '\0', sizeof(*dict_undo)); + + UT_LIST_ADD_LAST(node, *trx->dict_undo_list, dict_undo); + + return(dict_undo); +} + +/************************************************************************** +Free all the nodes on the undo list and free list.*/ + +void +dict_undo_free_list( +/*================*/ + trx_t* trx) +{ + dict_undo_t* dict_undo; + + ut_a(trx->dict_undo_list); + + dict_undo = UT_LIST_GET_FIRST(*trx->dict_undo_list); + + while (dict_undo) { + + UT_LIST_REMOVE(node, *trx->dict_undo_list, dict_undo); + + mem_free(dict_undo); + + dict_undo = UT_LIST_GET_FIRST(*trx->dict_undo_list); + } + + mem_free(trx->dict_undo_list); + + trx->dict_undo_list = NULL; +} + +/************************************************************************** +Create an undo list for the trx.*/ + +void +dict_redo_create_list( +/*==================*/ + trx_t* trx) /* out: dict_undo_t list */ +{ + ut_a(!trx->dict_redo_list); + + trx->dict_redo_list = mem_alloc(sizeof(*trx->dict_redo_list)); + + UT_LIST_INIT(*trx->dict_redo_list); +} + +/************************************************************************** +Create an dict_undo_t element and append to the undo list of the trx.*/ + +dict_redo_t* +dict_redo_create_element( +/*=====================*/ /* out: dict_undo_t element*/ + trx_t* trx) /* in: create & add elem to this trx */ +{ + dict_redo_t* dict_redo; + + ut_a(trx->dict_redo_list); + + dict_redo = mem_alloc(sizeof(*dict_redo)); + memset(dict_redo, '\0', sizeof(*dict_redo)); + + UT_LIST_ADD_LAST(node, *trx->dict_redo_list, dict_redo); + + return(dict_redo); +} + +/************************************************************************** +Free all the nodes on the undo list and free list.*/ + +void +dict_redo_free_list( +/*================*/ + trx_t* trx) +{ + dict_redo_t* dict_redo; + + ut_a(trx->dict_redo_list); + + dict_redo = UT_LIST_GET_FIRST(*trx->dict_redo_list); + + while (dict_redo) { + + UT_LIST_REMOVE(node, *trx->dict_redo_list, dict_redo); + + mem_free(dict_redo); + + dict_redo = UT_LIST_GET_FIRST(*trx->dict_redo_list); + } + + mem_free(trx->dict_redo_list); + + trx->dict_redo_list = NULL; +} + +/************************************************************************** +Get the index by name from the transaction's REDO list.*/ + +dict_index_t* +dict_redo_get_index_on_name( +/*========================*/ + trx_t* trx, /* in: transaction */ + dict_table_t* table, /* in: table the index belongs to */ + const char* name) /* in: index name */ +{ + dict_redo_t* dict_redo; + + ut_a(trx->dict_redo_list); + + dict_redo = UT_LIST_GET_FIRST(*trx->dict_redo_list); + + while (dict_redo) { + + if (dict_redo->index->table == table + && ut_strcmp(dict_redo->index->name, name) == 0) { + + return(dict_redo->index); + } + + dict_redo = UT_LIST_GET_NEXT(node, dict_redo); + } + + return(NULL); +} + +/************************************************************************** +Remove the index from the transaction's REDO list.*/ + +void +dict_redo_remove_index( +/*===================*/ + trx_t* trx, /* in: transaction */ + dict_index_t* index) /* in: index to remove */ +{ + dict_redo_t* dict_redo; + + ut_a(trx->dict_redo_list); + + dict_redo = UT_LIST_GET_FIRST(*trx->dict_redo_list); + + while (dict_redo) { + + if (dict_redo->index == index) { + UT_LIST_REMOVE(node, *trx->dict_redo_list, dict_redo); + + break; + } + + dict_redo = UT_LIST_GET_NEXT(node, dict_redo); + } + +} + +/************************************************************************** +Add the indexes to SYS_INDEX.*/ + +ulint +dict_rename_indexes( +/*================*/ + trx_t* trx, /* in: transaction */ + ibool commit_flag) /* in: UNUSED */ +{ + dict_redo_t* dict_redo; + ulint err = DB_SUCCESS; + + ut_a(trx->dict_redo_list); + ut_a(commit_flag); + + dict_redo = UT_LIST_GET_FIRST(*trx->dict_redo_list); + + while (dict_redo && err == DB_SUCCESS) { + dict_index_t* index; + + index = dict_redo->index; + + ut_a(index->table); + ut_a(!ut_dulint_is_zero(index->id)); + ut_a(index->space == index->table->space); +#ifdef UNIV_DEBUG + fprintf(stderr, "Renaming index: %s\n", index->name); +#endif /* UNIV_DEBUG */ + + err = row_merge_rename_index(trx, index->table, index); + + dict_redo = UT_LIST_GET_NEXT(node, dict_redo); + } + + /* We free the list anyway - even if there is an error of some sort, + let the UNDO code handle the errors.*/ + dict_redo_free_list(trx); + + return(err); +} + diff --git a/dict/dict0load.c b/dict/dict0load.c index 1da3f33202d..4ec804a5cbf 100644 --- a/dict/dict0load.c +++ b/dict/dict0load.c @@ -476,32 +476,12 @@ dict_load_columns( mtr_commit(&mtr); } -/************************************************************************ -Report that an index field or index for a table has been delete marked. */ -static -void -dict_load_report_deleted_index( -/*===========================*/ - const char* name, /* in: table name */ - ulint field) /* in: index field, or ULINT_UNDEFINED */ -{ - fprintf(stderr, "InnoDB: Error: data dictionary entry" - " for table %s is corrupt!\n", name); - if (field != ULINT_UNDEFINED) { - fprintf(stderr, - "InnoDB: Index field %lu is delete marked.\n", field); - } else { - fputs("InnoDB: An index is delete marked.\n", stderr); - } -} - /************************************************************************ Loads definitions for index fields. */ static void dict_load_fields( /*=============*/ - dict_table_t* table, /* in: table */ dict_index_t* index, /* in: index whose fields to load */ mem_heap_t* heap) /* in: memory heap for temporary storage */ { @@ -543,13 +523,18 @@ dict_load_fields( rec = btr_pcur_get_rec(&pcur); ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); + + /* There could be delete marked records in SYS_FIELDS + because SYS_FIELDS.INDEX_ID can be updated + by ALTER TABLE ADD INDEX. */ + if (rec_get_deleted_flag(rec, 0)) { - dict_load_report_deleted_index(table->name, i); + + goto next_rec; } field = rec_get_nth_field_old(rec, 0, &len); ut_ad(len == 8); - ut_a(ut_memcmp(buf, field, len) == 0); field = rec_get_nth_field_old(rec, 1, &len); ut_a(len == 4); @@ -584,6 +569,7 @@ dict_load_fields( (char*) field, len), prefix_len); +next_rec: btr_pcur_move_to_next_user_rec(&pcur, &mtr); } @@ -662,16 +648,9 @@ dict_load_indexes( if (ut_memcmp(buf, field, len) != 0) { break; - } - - if (rec_get_deleted_flag(rec, 0)) { - dict_load_report_deleted_index(table->name, - ULINT_UNDEFINED); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(FALSE); + } else if (rec_get_deleted_flag(rec, 0)) { + /* Skip delete marked records */ + goto next_rec; } field = rec_get_nth_field_old(rec, 1, &len); @@ -714,12 +693,15 @@ dict_load_indexes( if ((type & DICT_CLUSTERED) == 0 && NULL == dict_table_get_first_index(table)) { - fprintf(stderr, - "InnoDB: Error: trying to load index %s" - " for table %s\n" - "InnoDB: but the first index" - " is not clustered!\n", - name_buf, table->name); + if (*table->name != TEMP_TABLE_PREFIX) { + + fprintf(stderr, + "InnoDB: Error: trying to" + " load index %s for table %s\n" + "InnoDB: but the first index" + " is not clustered!\n", + name_buf, table->name); + } btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -741,10 +723,11 @@ dict_load_indexes( space, type, n_fields); index->id = id; - dict_load_fields(table, index, heap); + dict_load_fields(index, heap); dict_index_add_to_cache(table, index, page_no); } +next_rec: btr_pcur_move_to_next_user_rec(&pcur, &mtr); } diff --git a/dict/dict0mem.c b/dict/dict0mem.c index b499fba5af8..204e5cc6398 100644 --- a/dict/dict0mem.c +++ b/dict/dict0mem.c @@ -62,6 +62,7 @@ dict_mem_table_create( table->n_foreign_key_checks_running = 0; table->cached = FALSE; + table->to_be_dropped = 0; table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS) * sizeof(dict_col_t)); @@ -75,6 +76,7 @@ dict_mem_table_create( UT_LIST_INIT(table->locks); UT_LIST_INIT(table->foreign_list); UT_LIST_INIT(table->referenced_list); + UT_LIST_INIT(table->prebuilts); #ifdef UNIV_DEBUG table->does_not_fit_in_memory = FALSE; @@ -236,6 +238,7 @@ dict_mem_index_create( heap = mem_heap_create(DICT_HEAP_SIZE); index = mem_heap_alloc(heap, sizeof(dict_index_t)); + index->id = ut_dulint_create(0, 0); index->heap = heap; index->type = type; @@ -253,6 +256,8 @@ dict_mem_index_create( index->stat_n_diff_key_vals = NULL; index->cached = FALSE; + index->to_be_dropped = FALSE; + index->trx_id = ut_dulint_create(0, 0); memset(&index->lock, 0, sizeof index->lock); #ifdef UNIV_DEBUG index->magic_n = DICT_INDEX_MAGIC_N; diff --git a/handler/ha_innodb.cc b/handler/ha_innodb.cc index 2557f9f005c..299eafaf653 100644 --- a/handler/ha_innodb.cc +++ b/handler/ha_innodb.cc @@ -1,5 +1,4 @@ /* Copyright (C) 2000-2005 MySQL AB & Innobase Oy - This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. @@ -89,7 +88,9 @@ extern "C" { #include "../storage/innobase/include/sync0sync.h" #include "../storage/innobase/include/fil0fil.h" #include "../storage/innobase/include/trx0xa.h" +#include "../storage/innobase/include/row0merge.h" #include "../storage/innobase/include/thr0loc.h" +#include "../storage/innobase/include/dict0boot.h" #include "../storage/innobase/include/ha_prototypes.h" } @@ -164,6 +165,14 @@ static handler *innobase_create_handler(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root); +/******************************************************************** +Return alter table flags supported in an InnoDB database. */ +static +uint +innobase_alter_table_flags( +/*=======================*/ + uint flags); + static const char innobase_hton_name[]= "InnoDB"; static handler *innobase_create_handler(handlerton *hton, @@ -491,7 +500,7 @@ innobase_release_temporary_latches( if (!innodb_inited) { - return 0; + return(0); } trx = thd_to_trx(thd, hton); @@ -499,7 +508,7 @@ innobase_release_temporary_latches( if (trx) { innobase_release_stat_resources(trx); } - return 0; + return(0); } /************************************************************************ @@ -618,18 +627,21 @@ convert_error_code_to_mysql( return(HA_ERR_NO_SAVEPOINT); } else if (error == (int) DB_LOCK_TABLE_FULL) { - /* Since we rolled back the whole transaction, we must - tell it also to MySQL so that MySQL knows to empty the - cached binlog for this transaction */ + /* Since we rolled back the whole transaction, we must + tell it also to MySQL so that MySQL knows to empty the + cached binlog for this transaction */ - if (thd) { - ha_rollback(thd); - } + if (thd) { + ha_rollback(thd); + } - return(HA_ERR_LOCK_TABLE_FULL); - } else { - return(-1); // Unknown error - } + return(HA_ERR_LOCK_TABLE_FULL); + } else if (error == (int) DB_CANNOT_DROP_FOREIGN_INDEX) { + + return(HA_ERR_DROP_INDEX_FK); + } else { + return(-1); // Unknown error + } } /***************************************************************** @@ -1457,6 +1469,7 @@ innobase_init(void *p) innobase_hton->show_status=innobase_show_status; innobase_hton->flags=HTON_NO_FLAGS; innobase_hton->release_temporary_latches=innobase_release_temporary_latches; + innobase_hton->alter_table_flags = innobase_alter_table_flags; if (have_innodb != SHOW_OPTION_YES) DBUG_RETURN(0); // nothing else to do @@ -1741,6 +1754,21 @@ innobase_flush_logs(handlerton *hton) DBUG_RETURN(result); } +/******************************************************************** +Return alter table flags supported in an InnoDB database. */ +static +uint +innobase_alter_table_flags( +/*=======================*/ + uint flags) +{ + return(HA_ONLINE_ADD_INDEX_NO_WRITES + | HA_ONLINE_DROP_INDEX_NO_WRITES + | HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES + | HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES + | HA_ONLINE_ADD_PK_INDEX_NO_WRITES); +} + /********************************************************************* Commits a transaction in an InnoDB database. */ static @@ -2307,7 +2335,7 @@ ha_innobase::bas_ext() const /*========================*/ /* out: file extension string */ { - return ha_innobase_exts; + return(ha_innobase_exts); } @@ -2690,7 +2718,7 @@ innobase_mysql_cmp( return(0); } default: - assert(0); + ut_error; } return(0); @@ -2794,7 +2822,7 @@ get_innobase_type_from_mysql_type( case MYSQL_TYPE_LONG_BLOB: return(DATA_BLOB); default: - assert(0); + ut_error; } return(0); @@ -3473,7 +3501,7 @@ no_commit: /* We must use the handler code to update the auto-increment value to be sure that we increment it correctly. */ - if ((error= update_auto_increment())) + if ((error= update_auto_increment())) goto func_exit; auto_inc_used = 1; @@ -3726,7 +3754,7 @@ ha_innobase::update_row( /* This is not a delete */ prebuilt->upd_node->is_delete = FALSE; - assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW); + ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW); innodb_srv_conc_enter_innodb(trx); @@ -3848,6 +3876,36 @@ ha_innobase::try_semi_consistent_read(bool yes) } } +/********************************************************************** +Check if an index can be used by the optimizer. */ + +bool +ha_innobase::is_index_available( +/*============================*/ + /* out: true if available else false*/ + uint keynr) /* in: index number to check */ +{ + DBUG_ENTER("is_index_available"); + + if (table && keynr != MAX_KEY && table->s->keys > 0) { + dict_index_t* index; + KEY* key = table->key_info + keynr; + + ut_ad(user_thd == current_thd); + ut_a(prebuilt->trx == (trx_t*) current_thd->ha_data[ht->slot]); + + index = dict_table_get_index_on_name( + prebuilt->table, key->name); + + if (!row_merge_is_index_usable(prebuilt->trx, index)) { + + DBUG_RETURN(false); + } + } + + DBUG_RETURN(true); +} + /********************************************************************** Initializes a handle to use an index. */ @@ -3858,12 +3916,9 @@ ha_innobase::index_init( uint keynr, /* in: key (index) number */ bool sorted) /* in: 1 if result MUST be sorted according to index */ { - int error = 0; DBUG_ENTER("index_init"); - error = change_active_index(keynr); - - DBUG_RETURN(error); + DBUG_RETURN(change_active_index(keynr)); } /********************************************************************** @@ -3912,7 +3967,7 @@ convert_search_mode_to_innobase( innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to work correctly. */ - default: assert(0); + default: ut_error; } return(0); @@ -4008,20 +4063,21 @@ ha_innobase::index_read( necessarily prebuilt->index, but can also be the clustered index */ if (prebuilt->sql_stat_start) { - build_template(prebuilt, user_thd, table, - ROW_MYSQL_REC_FIELDS); + build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS); } if (key_ptr) { /* Convert the search key value to InnoDB format into prebuilt->search_tuple */ - row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple, - (byte*) key_val_buff, - (ulint)upd_and_key_val_buff_len, - index, - (byte*) key_ptr, - (ulint) key_len, prebuilt->trx); + row_sel_convert_mysql_key_to_innobase( + prebuilt->search_tuple, + (byte*) key_val_buff, + (ulint)upd_and_key_val_buff_len, + index, + (byte*) key_ptr, + (ulint) key_len, + prebuilt->trx); } else { /* We position the cursor to the last or the first entry in the index */ @@ -4034,10 +4090,12 @@ ha_innobase::index_read( match_mode = 0; if (find_flag == HA_READ_KEY_EXACT) { + match_mode = ROW_SEL_EXACT; } else if (find_flag == HA_READ_PREFIX - || find_flag == HA_READ_PREFIX_LAST) { + || find_flag == HA_READ_PREFIX_LAST) { + match_mode = ROW_SEL_EXACT_PREFIX; } @@ -4050,18 +4108,24 @@ ha_innobase::index_read( innodb_srv_conc_exit_innodb(prebuilt->trx); if (ret == DB_SUCCESS) { + error = 0; table->status = 0; } else if (ret == DB_RECORD_NOT_FOUND) { + error = HA_ERR_KEY_NOT_FOUND; table->status = STATUS_NOT_FOUND; } else if (ret == DB_END_OF_INDEX) { + error = HA_ERR_KEY_NOT_FOUND; table->status = STATUS_NOT_FOUND; + } else { + error = convert_error_code_to_mysql((int) ret, user_thd); + table->status = STATUS_NOT_FOUND; } @@ -4097,8 +4161,9 @@ ha_innobase::change_active_index( index, even if it was internally generated by InnoDB */ { - KEY* key=0; + KEY* key = 0; ha_statistic_increment(&SSV::ha_read_key_count); + DBUG_ENTER("change_active_index"); ut_ad(user_thd == current_thd); @@ -4107,13 +4172,15 @@ ha_innobase::change_active_index( active_index = keynr; if (keynr != MAX_KEY && table->s->keys > 0) { + key = table->key_info + active_index; - prebuilt->index = dict_table_get_index_noninline( + prebuilt->index = dict_table_get_index_on_name( prebuilt->table, key->name); + } else { prebuilt->index = dict_table_get_first_index_noninline( - prebuilt->table); + prebuilt->table); } if (!prebuilt->index) { @@ -4126,7 +4193,7 @@ ha_innobase::change_active_index( DBUG_RETURN(1); } - assert(prebuilt->search_tuple != 0); + ut_a(prebuilt->search_tuple != 0); dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields); @@ -4194,8 +4261,9 @@ ha_innobase::general_fetch( innodb_srv_conc_enter_innodb(prebuilt->trx); - ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode, - direction); + ret = row_search_for_mysql( + (byte*)buf, 0, prebuilt, match_mode, direction); + innodb_srv_conc_exit_innodb(prebuilt->trx); if (ret == DB_SUCCESS) { @@ -4328,7 +4396,7 @@ int ha_innobase::rnd_init( /*==================*/ /* out: 0 or error number */ - bool scan) /* in: ???????? */ + bool scan) /* in: TRUE if table/index scan FALSE otherwise */ { int err; @@ -4382,9 +4450,11 @@ ha_innobase::rnd_next( if (start_of_scan) { error = index_first(buf); + if (error == HA_ERR_KEY_NOT_FOUND) { error = HA_ERR_END_OF_FILE; } + start_of_scan = 0; } else { error = general_fetch(buf, ROW_SEL_NEXT, 0); @@ -4743,6 +4813,7 @@ create_clustered_index_when_no_primary( index = dict_mem_index_create((char*) table_name, (char*) "GEN_CLUST_INDEX", 0, DICT_CLUSTERED, 0); + error = row_create_index_for_mysql(index, trx, NULL); error = convert_error_code_to_mysql(error, NULL); @@ -5076,7 +5147,7 @@ ha_innobase::delete_table( name_len = strlen(name); - assert(name_len < 1000); + ut_a(name_len < 1000); /* Strangely, MySQL passes the table name without the '.frm' extension, in contrast to ::create */ @@ -5186,7 +5257,90 @@ innobase_drop_database( return; #endif } +/************************************************************************* +Renames an InnoDB table. */ +static +int +innobase_rename_table( +/*==================*/ + /* out: 0 or error code */ + trx_t* trx, /* in: transaction */ + const char* from, /* in: old name of the table */ + const char* to, /* in: new name of the table */ + ibool commit_flag) /* in: if TRUE then commit */ +{ + int error; + char* norm_to; + char* norm_from; + if (lower_case_table_names) { + srv_lower_case_table_names = TRUE; + } else { + srv_lower_case_table_names = FALSE; + } + + // Magic number 64 arbitrary + norm_to = (char*) my_malloc(strlen(to) + 64, MYF(0)); + norm_from = (char*) my_malloc(strlen(from) + 64, MYF(0)); + + if (*to != TEMP_TABLE_PREFIX) { + normalize_table_name(norm_to, to); + } else { + strcpy(norm_to, to); + } + + if (*from != TEMP_TABLE_PREFIX) { + normalize_table_name(norm_from, from); + } else { + strcpy(norm_from, from); + } + + /* Serialize data dictionary operations with dictionary mutex: + no deadlocks can occur then in these operations */ + + row_mysql_lock_data_dictionary(trx); + + error = row_rename_table_for_mysql( + norm_from, norm_to, trx, commit_flag); + + if (error != DB_SUCCESS) { + FILE* ef = dict_foreign_err_file; + + fputs("InnoDB: Rename from old name ", ef); + + if (*norm_from != TEMP_TABLE_PREFIX) { + ut_print_name(ef, trx, TRUE, norm_from); + } else { + fputs(norm_from, ef); + } + + fputs(" to ", ef); + + if (*norm_to != TEMP_TABLE_PREFIX) { + ut_print_name(ef, trx, TRUE, norm_to); + } else { + fputs(norm_to, ef); + } + + fputs(" failed!\n", ef); + } + + row_mysql_unlock_data_dictionary(trx); + + /* Flush the log to reduce probability that the .frm files and + the InnoDB data dictionary get out-of-sync if the user runs + with innodb_flush_log_at_trx_commit = 0 */ + + log_buffer_flush_to_disk(); + + /* Tell the InnoDB server that there might be work for + utility threads: */ + + my_free(norm_to, MYF(0)); + my_free(norm_from, MYF(0)); + + return error; +} /************************************************************************* Renames an InnoDB table. */ @@ -5197,13 +5351,9 @@ ha_innobase::rename_table( const char* from, /* in: old name of the table */ const char* to) /* in: new name of the table */ { - ulint name_len1; - ulint name_len2; + trx_t* trx; int error; trx_t* parent_trx; - trx_t* trx; - char norm_from[1000]; - char norm_to[1000]; DBUG_ENTER("ha_innobase::rename_table"); @@ -5217,12 +5367,6 @@ ha_innobase::rename_table( trx_search_latch_release_if_reserved(parent_trx); - if (lower_case_table_names) { - srv_lower_case_table_names = TRUE; - } else { - srv_lower_case_table_names = FALSE; - } - trx = trx_allocate_for_mysql(); trx->mysql_thd = current_thd; trx->mysql_query_str = &((*current_thd).query); @@ -5231,24 +5375,7 @@ ha_innobase::rename_table( trx->check_foreigns = FALSE; } - name_len1 = strlen(from); - name_len2 = strlen(to); - - assert(name_len1 < 1000); - assert(name_len2 < 1000); - - normalize_table_name(norm_from, from); - normalize_table_name(norm_to, to); - - /* Rename the table in InnoDB */ - - error = row_rename_table_for_mysql(norm_from, norm_to, trx); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); + error = innobase_rename_table(trx, from, to, TRUE); /* Tell the InnoDB server that there might be work for utility threads: */ @@ -5308,7 +5435,10 @@ ha_innobase::records_in_range( key = table->key_info + active_index; - index = dict_table_get_index_noninline(prebuilt->table, key->name); + index = dict_table_get_index_on_name(prebuilt->table, key->name); + + /* MySQL knows about this index and so we must be able to find it.*/ + ut_a(index); range_start = dtuple_create_for_mysql(&heap1, key->key_parts); dict_index_copy_types(range_start, index, key->key_parts); @@ -5391,8 +5521,11 @@ ha_innobase::estimate_rows_upper_bound(void) index = dict_table_get_first_index_noninline(prebuilt->table); - local_data_file_length = ((ulonglong) index->stat_n_leaf_pages) - * UNIV_PAGE_SIZE; + ut_a(index->stat_n_leaf_pages > 0); + + local_data_file_length = + ((ulonglong) index->stat_n_leaf_pages) * UNIV_PAGE_SIZE; + /* Calculate a minimum length for a clustered index record and from that an upper bound for the number of rows. Since we only calculate @@ -5642,10 +5775,18 @@ ha_innobase::info( } if (flag & HA_STATUS_ERRKEY) { + dict_index_t* err_index; + ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N); - errkey = (unsigned int) row_get_mysql_key_number_for_index( - (dict_index_t*) trx_get_error_info(prebuilt->trx)); + err_index = (dict_index_t*)trx_get_error_info(prebuilt->trx); + + if (err_index) { + errkey = (unsigned int) + row_get_mysql_key_number_for_index(err_index); + } else { + errkey = (unsigned int) prebuilt->trx->error_key_num; + } } if (flag & HA_STATUS_AUTO && table->found_next_number_field) { @@ -5677,7 +5818,7 @@ ha_innobase::info( prebuilt->trx->op_info = (char*)""; - DBUG_RETURN(0); + DBUG_RETURN(0); } /************************************************************************** @@ -5948,7 +6089,7 @@ ha_innobase::get_foreign_key_list(THD *thd, List *f_key_list) { length=7; tmp_buff= "CASCADE"; - } + } else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) { length=8; @@ -5966,8 +6107,8 @@ ha_innobase::get_foreign_key_list(THD *thd, List *f_key_list) } f_key_info.delete_method= make_lex_string(thd, f_key_info.delete_method, tmp_buff, length, 1); - - + + if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) { length=7; @@ -6050,7 +6191,7 @@ ha_innobase::referenced_by_foreign_key(void) /*========================================*/ /* out: > 0 if referenced by a FOREIGN KEY */ { - if (dict_table_referenced_by_foreign_key(prebuilt->table)) { + if (dict_table_is_referenced_by_foreign_key(prebuilt->table)) { return(1); } @@ -6472,7 +6613,7 @@ innodb_export_status() srv_export_innodb_status(); } - return 0; + return(0); } /**************************************************************************** @@ -6679,7 +6820,7 @@ bool innobase_show_status(handlerton *hton, THD* thd, case HA_ENGINE_MUTEX: return innodb_mutex_show_status(hton, thd, stat_print); default: - return FALSE; + return(FALSE); } } @@ -6694,7 +6835,7 @@ static mysql_byte* innobase_get_key(INNOBASE_SHARE* share, uint* length, { *length=share->table_name_length; - return (mysql_byte*) share->table_name; + return((mysql_byte*) share->table_name); } static INNOBASE_SHARE* get_share(const char* table_name) @@ -6719,7 +6860,7 @@ static INNOBASE_SHARE* get_share(const char* table_name) pthread_mutex_unlock(&innobase_share_mutex); my_free((gptr) share,0); - return 0; + return(0); } thr_lock_init(&share->lock); @@ -6729,7 +6870,7 @@ static INNOBASE_SHARE* get_share(const char* table_name) share->use_count++; pthread_mutex_unlock(&innobase_share_mutex); - return share; + return(share); } static void free_share(INNOBASE_SHARE* share) @@ -7178,7 +7319,7 @@ ha_innobase::get_error_message(int error, String *buf) buf->copy(trx->detailed_error, strlen(trx->detailed_error), system_charset_info); - return FALSE; + return(FALSE); } /*********************************************************************** @@ -7384,6 +7525,7 @@ is INSERT ON DUPLICATE KEY UPDATE. NOTE that storage/innobase/row/row0ins.c must contain the prototype for this function ! */ extern "C" +extern "C" ibool innobase_query_is_update(void) /*==========================*/ @@ -7504,7 +7646,7 @@ innobase_xa_prepare( srv_active_wake_master_thread(); - return error; + return(error); } /*********************************************************************** @@ -7551,6 +7693,976 @@ innobase_commit_by_xid( } } +/*********************************************************************** +This function checks that index keys are sensible. */ +static +int +innobase_check_index_keys( +/*======================*/ + /* out: 0 or error number */ + TABLE* table, /* in: MySQL table */ + dict_table_t* innodb_table, /* in: InnoDB table */ + trx_t* trx, /* in: transaction */ + KEY* key_info, /* in: Indexes to be created */ + ulint num_of_keys) /* in: Number of indexes to + be created */ +{ + Field* field; + ulint key_num; + int error = DB_SUCCESS; + ibool is_unsigned; + + ut_ad(table && innodb_table && trx && key_info && num_of_keys); + + for (key_num = 0; key_num < num_of_keys; key_num++) { + KEY* key; + + key = &(key_info[key_num]); + + /* Check that the same index name does not appear + twice in indexes to be created. */ + + for (ulint i = 0; i < key_num; i++) { + KEY* key2; + + key2 = &key_info[i]; + + if (0 == strcmp(key->name, key2->name)) { + ut_print_timestamp(stderr); + + fputs(" InnoDB: Error: index ", stderr); + ut_print_name(stderr, trx, FALSE, key->name); + fputs(" appears twice in create index\n", + stderr); + + error = ER_WRONG_NAME_FOR_INDEX; + + return(error); + } + } + + /* Check that MySQL does not try to create a column prefix index + field on an inappropriate data type and that the same colum does + not appear twice in the index. */ + + for (ulint i = 0; i < key->key_parts; i++) { + KEY_PART_INFO* key_part1; + ulint col_type; /* Column type */ + + key_part1 = key->key_part + i; + + field = key_part1->field; + + col_type = get_innobase_type_from_mysql_type( + &is_unsigned, field); + + if (DATA_BLOB == col_type + || (key_part1->length < field->pack_length() + && field->type() != MYSQL_TYPE_VARCHAR) + || (field->type() == MYSQL_TYPE_VARCHAR + && key_part1->length < field->pack_length() + - ((Field_varstring*)field)->length_bytes)) { + + if (col_type == DATA_INT + || col_type == DATA_FLOAT + || col_type == DATA_DOUBLE + || col_type == DATA_DECIMAL) { + fprintf(stderr, +"InnoDB: error: MySQL is trying to create a column prefix index field\n" +"InnoDB: on an inappropriate data type. Table name %s, column name %s.\n", + innodb_table->name, + field->field_name); + + error = ER_WRONG_KEY_COLUMN; + } + } + + for (ulint j = 0; j < i; j++) { + KEY_PART_INFO* key_part2; + + key_part2 = key->key_part + j; + + if (0 == strcmp( + key_part1->field->field_name, + key_part2->field->field_name)) { + + ut_print_timestamp(stderr); + + fputs(" InnoDB: Error: column ", + stderr); + + ut_print_name(stderr, trx, FALSE, + key_part1->field->field_name); + + fputs(" appears twice in ", stderr); + + ut_print_name(stderr, trx, FALSE, + key->name); + fputs("\n" +" InnoDB: This is not allowed in InnoDB.\n", + stderr); + + error = ER_WRONG_KEY_COLUMN; + + return(error); + } + } + } + + } + + return(error); +} + +/*********************************************************************** +Create index field definition for key part */ +static +merge_index_field_t* +innobase_create_index_field_def( +/*============================*/ + /* out: Index field definition for + this key part */ + KEY_PART_INFO* key_part, /* in: Key definition */ + mem_heap_t* heap) /* in: heap where memory is allocated */ +{ + Field* field; + merge_index_field_t* index_field; + ibool is_unsigned; + ulint col_type; + ulint len; + + DBUG_ENTER("innobase_create_index_field_def"); + + ut_a(key_part && heap); + + index_field = (merge_index_field_t*) mem_heap_alloc_noninline( + heap, + sizeof(merge_index_field_t)); + + ut_a(index_field); + + field = key_part->field; + ut_a(field); + + col_type = get_innobase_type_from_mysql_type(&is_unsigned, field); + + index_field->col_type = col_type; + + if (DATA_BLOB == col_type + || (key_part->length < field->pack_length() + && field->type() != MYSQL_TYPE_VARCHAR) + || (field->type() == MYSQL_TYPE_VARCHAR + && key_part->length < field->pack_length() + - ((Field_varstring*)field)->length_bytes)) { + + index_field->prefix_len = key_part->length; + } else { + index_field->prefix_len = 0; + } + + len = strlen(field->field_name) + 1; + index_field->field_name = (char *)mem_heap_alloc_noninline(heap, len); + memcpy(index_field->field_name, field->field_name, len); + + DBUG_RETURN(index_field); +} + +/*********************************************************************** +Create index definition for key */ +static +merge_index_def_t* +innobase_create_index_def( +/*======================*/ + /* out: Index definition */ + KEY* key, /* in: key definition */ + mem_heap_t* heap) /* in: heap where memory is allocated */ +{ + ulint len; + merge_index_def_t* index; + ulint n_fields = key->key_parts; + + DBUG_ENTER("innobase_create_index_def"); + + ut_a(key && heap); + + index = (merge_index_def_t*) mem_heap_alloc_noninline( + heap, sizeof(merge_index_def_t)); + + index->fields = (merge_index_field_t**) mem_heap_alloc_noninline( + heap, sizeof(merge_index_field_t*) * n_fields); + + index->ind_type = 0; + index->n_fields = n_fields; + len = strlen(key->name) + 2; + index->name = (char *)mem_heap_alloc_noninline(heap, len); + + --len; + + if (my_strcasecmp(system_charset_info, key->name, "PRIMARY")) { + *index->name = TEMP_TABLE_PREFIX; + memcpy(index->name + 1, key->name, len); + } else { + memcpy(index->name, key->name, len); + } + + if (key->flags & HA_NOSAME) { + index->ind_type = index->ind_type | DICT_UNIQUE; + } + + if (!my_strcasecmp(system_charset_info, key->name, "PRIMARY")) { + index->ind_type = index->ind_type | DICT_CLUSTERED; + } + + for (ulint i = 0; i < n_fields; i++) { + KEY_PART_INFO* key_part; + + key_part = key->key_part + i; + index->fields[i] = innobase_create_index_field_def( + key_part, heap); + } + + DBUG_RETURN(index); +} + +/*********************************************************************** +Copy index field definition */ +static +merge_index_field_t* +innobase_copy_index_field_def( +/*==========================*/ + /* out: Index field definition for + this index */ + dict_field_t* field, /* in: Index definition to copy*/ + mem_heap_t* heap) /* in: heap where memory is allocated */ +{ + merge_index_field_t* index_field; + ulint len; + + DBUG_ENTER("innobase_copy_index_field_def"); + + ut_a(field && heap); + + index_field = (merge_index_field_t*) mem_heap_alloc_noninline( + heap, + sizeof(merge_index_field_t)); + + index_field->col_type = (field->col->prtype & 0xFFUL); + len = strlen(field->name) + 1; + index_field->field_name = (char *)mem_heap_alloc_noninline(heap, len); + memcpy(index_field->field_name, field->name, len); + index_field->prefix_len = field->prefix_len; + + DBUG_RETURN(index_field); +} + +/*********************************************************************** +Copy index definition for the index */ +static +merge_index_def_t* +innobase_copy_index_def( +/*====================*/ + /* out: Index definition */ + dict_index_t* index, /* in: index definition to copy */ + mem_heap_t* heap) /* in: heap where memory is allocated */ +{ + merge_index_def_t* new_index; + ulint n_fields; + ulint i; + ulint len; + + DBUG_ENTER("innobase_copy_index_def"); + + ut_a(index && heap); + + if (!(index->type & DICT_CLUSTERED)) { + /* Note that from the secondary index we take only + those fields that user defined to be in the index. + In the internal representation more colums were + added and those colums are not copied.*/ + + n_fields = index->n_user_defined_cols; + } else { + n_fields = index->n_fields; + } + + new_index = (merge_index_def_t*) mem_heap_alloc_noninline( + heap, sizeof(merge_index_def_t)); + + new_index->fields = (merge_index_field_t**) mem_heap_alloc_noninline( + heap, sizeof(merge_index_field_t*) * n_fields); + + new_index->ind_type = index->type; + new_index->n_fields = n_fields; + len = strlen(index->name) + 1; + new_index->name = (char *)mem_heap_alloc_noninline(heap, len); + memcpy(new_index->name, index->name, len); + + for (i = 0; i < n_fields; i++) { + dict_field_t* field = ((index->fields) + i); + + new_index->fields[i] = innobase_copy_index_field_def( + field, heap); + } + + DBUG_RETURN(new_index); +} + +/*********************************************************************** +Create an index table where indexes are ordered as follows: + +IF a primary key is defined for the table THEN + + 1) New primary key + 2) Original unique secondary indexes + 3) New unique secondary indexes + 4) Original secondary indexes + 5) New secondary indexes + +ELSE + + 1) All new indexes in the order they arrive from MySQL + +ENDIF + +*/ +static +merge_index_def_t** +innobase_create_key_def( +/*====================*/ + /* out: key definitions or NULL */ + trx_t* trx, /* in: trx */ + dict_table_t* table, /* in: table definition */ + mem_heap_t* heap, /* in: heap where space for key + definitions are allocated */ + KEY* key_info, /* in: Indexes to be created */ + ulint* n_keys) /* in/out: Number of indexes to + be created */ +{ + ulint n_indexes; /* Number of indexes */ + merge_index_def_t** indexdef; /* Index definition */ + merge_index_def_t** indexdefs; /* Index definitions */ + + DBUG_ENTER("innobase_create_key_def"); + + ut_a(trx && table && heap && key_info && n_keys && *n_keys); + + /* We do not need to count the original primary key */ + n_indexes = *n_keys + UT_LIST_GET_LEN(table->indexes) - 1; + + indexdef = indexdefs = (merge_index_def_t**) mem_heap_alloc_noninline( + heap, sizeof(merge_index_def_t*) * n_indexes); + + /* Primary key if defined is always the first index defined for + the table */ + + if (!my_strcasecmp(system_charset_info, key_info->name, "PRIMARY")) { + dict_index_t* index; + + /* Create the PRIMARY key index definition */ + *indexdef = innobase_create_index_def(key_info, heap); + + row_mysql_lock_data_dictionary(trx); + + /* Skip the clustered index */ + + index = dict_table_get_next_index_noninline( + dict_table_get_first_index_noninline(table)); + + /* Copy the definitions of all the secondary indexes */ + + ++indexdef; /* Since we've copied the primary key info */ + *n_keys = 1; + + while (index) { + + ut_a(!(index->type & DICT_CLUSTERED)); + + *indexdef = innobase_copy_index_def(index, heap); + index = dict_table_get_next_index_noninline(index); + + ++*n_keys; + ++indexdef; + } + + row_mysql_unlock_data_dictionary(trx); + + } else { + ulint i = 0; + + /* Create definitions for new unique secondary indexes */ + + for (KEY* key = key_info; i < *n_keys; ++key, ++i, ++indexdef) { + *indexdef = innobase_create_index_def(key, heap); + } + } + + DBUG_RETURN(indexdefs); +} + +/*********************************************************************** +Create a temporary tablename using query id, thread id, and id */ +char* +innobase_create_temporary_tablename( +/*================================*/ + /* out: New temporary tablename */ + THD* thd, /* in: User thread */ + const char* table_name, /* in: Old table name */ + ulint id) /* in: id */ +{ + char* new_name; + ulint old_len; + ulint new_len; + + old_len = strlen(table_name); + new_len = old_len + 1 + 64; + + new_name = (char *)mem_alloc_noninline(new_len); + + sprintf(new_name,"%c%s-%lx_%lx", + TEMP_TABLE_PREFIX, table_name, thd->thread_id, id); + + fprintf(stderr, "NEW TABLE NAME: %s\n", new_name); + + return(new_name); +} + +/*********************************************************************** +Add a new index to a table */ + +int +ha_innobase::add_index( +/*===================*/ + /* out: 0 or error number */ + TABLE* table, /* in: Table where indexes are created */ + KEY* key_info, /* in: Indexes to be created */ + uint num_of_keys) /* in: Number of indexes to be created */ +{ + dict_index_t** index; /* Index to be created */ + dict_table_t* innodb_table; /* InnoDB table in dictionary */ + dict_table_t* indexed_table; /* Table where indexes are created */ + merge_index_def_t** index_defs; /* Index definitions */ + mem_heap_t* heap; /* Heap for index definitions */ + trx_t* trx; /* Transaction */ + trx_t* parent_trx; + ulint num_of_idx; + ulint num_created; + ibool dict_locked = FALSE; + ibool new_primary = FALSE; + ibool new_unique = FALSE; + ulint error = DB_SUCCESS;/* DB_SUCCESS or error code */ + + DBUG_ENTER("add_index"); + ut_a(table && key_info && num_of_keys); + + index = NULL; + + parent_trx = check_trx_exists(ht, current_thd); + trx_search_latch_release_if_reserved(parent_trx); + + trx = parent_trx; + ut_a(trx); + + trx_start_if_not_started_noninline(trx); + + trx->mysql_thd = current_thd; + trx->mysql_query_str = &((*current_thd).query); + + if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) { + trx->check_foreigns = FALSE; + } + + if (current_thd->options & OPTION_RELAXED_UNIQUE_CHECKS) { + trx->check_unique_secondary = FALSE; + } + + indexed_table = dict_table_get(prebuilt->table->name, FALSE); + innodb_table = indexed_table; + + /* Check that index keys are sensible */ + + error = innobase_check_index_keys( + table, innodb_table, trx, key_info, num_of_keys); + + if (error != DB_SUCCESS) { + trx_general_rollback_for_mysql(trx, FALSE, NULL); + + error = convert_error_code_to_mysql(error, current_thd); + + DBUG_RETURN((int)error); + } + + /* Create table containing all indexes to be built in this + alter table add index so that they are in the correct order + in the table. */ + + num_of_idx = num_of_keys; + + heap = mem_heap_create_noninline(1024); + + index_defs = innobase_create_key_def( + trx, innodb_table, heap, key_info, &num_of_idx); + + /* If a new primary key is defined for the table we need + to drop all original secondary indexes from the table. These + indexes will be rebuilt below. */ + + if (index_defs[0]->ind_type & DICT_CLUSTERED) { + char* new_table_name; + + new_primary = TRUE; + + new_table_name = innobase_create_temporary_tablename( + current_thd, (const char *)innodb_table->name, 17431); + + row_mysql_lock_data_dictionary(trx); + + /* Clone table and write UNDO log record */ + indexed_table = row_merge_create_temporary_table( + new_table_name, innodb_table, trx, &error); + + row_mysql_unlock_data_dictionary(trx); + + mem_free_noninline(new_table_name); + + if (error != DB_SUCCESS) { + mem_heap_free_noninline(heap); + trx_general_rollback_for_mysql(trx, FALSE, NULL); + error = convert_error_code_to_mysql(error, current_thd); + + DBUG_RETURN((int)error); + } + } else if (!trx->dict_redo_list) { + dict_redo_create_list(trx); + + ut_a(!trx->sync_cb); + trx->sync_cb = dict_rename_indexes; + } + + /* Allocate memory for dictionary index definitions */ + + index = (dict_index_t**) mem_alloc_noninline( + sizeof(dict_index_t*) * num_of_idx); + + /* Latch the InnoDB data dictionary exclusively so that no deadlocks + or lock waits can happen in it during an index create operation. + Drop table etc. do this latching in row0mysql.c. */ + + row_mysql_lock_data_dictionary(trx); + + num_created = 0; + dict_locked = TRUE; + + /* Create the indexes in SYS_INDEXES and load into dictionary.*/ + + for (ulint i = 0; i < num_of_idx && error == DB_SUCCESS; i++) { + + error = row_merge_create_index( + trx, &index[i], indexed_table, index_defs[i]); + + if (index_defs[i]->ind_type & DICT_UNIQUE) { + + new_unique = TRUE; + } + + num_created++; + } + + if (error == DB_SUCCESS) { + /* Raise version number of the table to track this table's + definition changes.*/ + + indexed_table->version_number++; + + row_mysql_unlock_data_dictionary(trx); + dict_locked = FALSE; + + /* Free index definition table */ + mem_heap_free_noninline(heap); + heap = 0; + + ut_a(trx->n_active_thrs == 0); + ut_a(UT_LIST_GET_LEN(trx->signals) == 0); + + error = row_lock_table_for_merge(trx, innodb_table, LOCK_X); + } + + /* Set an exclusive table lock for the new table if a primary + key is to be built.*/ + if (error == DB_SUCCESS && new_primary) { + + ut_ad(indexed_table != innodb_table); + + error = row_lock_table_for_merge(trx, indexed_table, LOCK_X); + } + + if (error == DB_SUCCESS) { + + /* Read clustered index of the table and build indexes + based on this information using temporary files and merge + sort.*/ + error = row_build_index_for_mysql( + trx, innodb_table, indexed_table, index, new_primary, + num_of_idx); + } + +#ifdef UNIV_DEBUG + /* TODO: At the moment we can't handle the following statement + in our debugging code below: + + alter table t drop index b, add index (b); + + The fix will have to parse the SQL and note that the index + being added has the same name as the the one being dropped and + ignore that in the dup index check.*/ + //dict_table_check_for_dup_indexes(prebuilt->table); +#endif + + /* Free index definition table */ + if (heap) { + mem_heap_free_noninline(heap); + heap = 0; + } + + if (dict_locked) { + row_mysql_unlock_data_dictionary(trx); + } + + /* After a error, remove all those index definitions from the + dictionary which were defined.*/ + + if (error != DB_SUCCESS) { + if (error == DB_DUPLICATE_KEY) { + prebuilt->trx->error_info = NULL; + prebuilt->trx->error_key_num = trx->error_key_num; + } + + row_remove_indexes_for_mysql( + trx, indexed_table, index, num_created); + } + + if (index) { + mem_free_noninline(index); + index = 0; + } + + /* If a new primary key was defined for the table and + there was no error at this point, we can now rename the + old table as a temporary table, rename the new temporary + table as a old table and drop the old table. */ + + if (new_primary == TRUE && error == DB_SUCCESS) { + char* old_name; + char* tmp_table_name; + + old_name = (char *)mem_alloc_noninline( + strlen(innodb_table->name) + 1); + + strcpy(old_name, innodb_table->name); + + tmp_table_name = innobase_create_temporary_tablename( + current_thd, (const char *)innodb_table->name, 232125); + + trx_start_if_not_started_noninline(trx); + + /* Write entry for UNDO */ + error = row_undo_report_rename_table_dict_operation( + trx, old_name, indexed_table->name, tmp_table_name); + + if (error == DB_SUCCESS) { + log_buffer_flush_to_disk(); + + /* Set the commit flag to FALSE, we will commit the + transaction ourselves, required for UNDO */ + error = innobase_rename_table( + trx, innodb_table->name, tmp_table_name, FALSE); + + if (error == DB_SUCCESS) { + error = innobase_rename_table( + trx, indexed_table->name, old_name, + FALSE); + } + + row_prebuilt_free(prebuilt); + prebuilt = row_create_prebuilt(indexed_table); + + row_mysql_lock_data_dictionary(trx); + prebuilt->table->n_mysql_handles_opened++; + row_mysql_unlock_data_dictionary(trx); + + /* Drop the old table iff there are no views that + refer to the old table. If there are views that refer + to the old table then we will drop the table when + we free the prebuilts and there are no more references + to it. */ + error = row_merge_drop_table(trx, innodb_table); + } + + mem_free_noninline(old_name); + mem_free_noninline(tmp_table_name); + } + + /* There might be work for utility threads.*/ + srv_active_wake_master_thread(); + + error = convert_error_code_to_mysql(error, current_thd); + + DBUG_RETURN((int)error); +} + +/*********************************************************************** +Drop a index from a table */ + +int +ha_innobase::prepare_drop_index( +/*============================*/ + /* out: 0 or error number */ + TABLE* table, /* in: Table where indexes are dropped */ + uint* key_num, /* in: Key nums to be dropped */ + uint num_of_keys) /* in: Number of keys to be dropped */ +{ + trx_t* trx; + THD* thd; + ulint error = DB_SUCCESS; + + DBUG_ENTER("prepare_drop_index"); + ut_ad(table && key_num && num_of_keys); + + thd = current_thd; + + /* Create a new transaction for prepare index drop if it + does not exists */ + + trx = check_trx_exists(ht, current_thd); + trx_search_latch_release_if_reserved(trx); + + if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) { + trx->check_foreigns = FALSE; + } + + if (current_thd->options & OPTION_RELAXED_UNIQUE_CHECKS) { + trx->check_unique_secondary = FALSE; + } + + /* Test and mark all the indexes to be dropped */ + + row_mysql_lock_data_dictionary(trx); + + for (ulint n_key = 0; n_key < num_of_keys; n_key++) { + KEY* key; + dict_index_t* index; + + key = table->key_info + key_num[n_key]; + ut_a(key); + + index = NULL; + + if (key) { + index = dict_table_get_index_on_name_and_min_id( + prebuilt->table, + key->name); + } + + if (!index) { + sql_print_error("InnoDB could not find key n:o %u " + "with name %s from dict cache for table %s", + key_num[n_key], key ? key->name : "NULL", + prebuilt->table->name); + + error = 1; + goto func_exit; + } + + index->to_be_dropped = TRUE; + } + + /* We check for the foreign key constraints after marking the + candidate indexes for deletion because when we check for an + equivalent foreign index we don't want to select an index that is + later deleted. */ + for (ulint n_key = 0; n_key < num_of_keys; n_key++) { + KEY* key; + dict_index_t* index; + + key = table->key_info + key_num[n_key]; + + index = dict_table_get_index_on_name_and_min_id( + prebuilt->table, key->name); + + ut_a(index); + ut_a(index->to_be_dropped); + + /* If FOREIGN_KEY_CHECK = 1 you may not drop an index + defined for a foreign key constraint because + InnoDB requires that both tables contain indexes + for the constraint. Note that create index id on table + does a create index and drop index and we can ignore + here foreign keys because a new index for the foreign + key has already been created. */ + + if (trx->check_foreigns + && thd->lex->sql_command != SQLCOM_CREATE_INDEX) { + dict_foreign_t* foreign; + ibool ok_to_delete = TRUE; + + /* Check if this index is referenced by some other + table */ + foreign = dict_table_get_referenced_constraint( + prebuilt->table, index); + + if (foreign) { + + ok_to_delete = FALSE; + + } else { + + /* Check if this index references some + other table */ + foreign = dict_table_get_foreign_constraint( + prebuilt->table, index); + + if (foreign) { + ut_a(foreign->foreign_index == index); + + /* Search for an equivalent index that + the foreign key contraint could use + if this index were to be deleted. */ + if (!dict_table_find_equivalent_index( + prebuilt->table, + foreign->foreign_index)) { + + ok_to_delete = FALSE; + } + } + } + + if (!ok_to_delete) { + + trx_set_detailed_error( + trx, + "Index needed in foreign key " + "constraint"); + + trx->error_info = index; + + FILE* ef = dict_foreign_err_file; + + error = DB_CANNOT_DROP_FOREIGN_INDEX; + + mutex_enter_noninline(&dict_foreign_err_mutex); + rewind(ef); + ut_print_timestamp(ef); + + fputs(" Cannot drop index ", ef); + ut_print_name(ef, trx, FALSE, index->name); + fputs("\nbecause it is referenced by ", ef); + ut_print_name(ef, trx, TRUE, + foreign->foreign_table_name); + putc('\n', ef); + mutex_exit_noninline(&dict_foreign_err_mutex); + + break; + } + } + } + +func_exit: + if (error != DB_SUCCESS) { + /* Undo our changes since there was some sort of error */ + for (ulint i = 0; i < num_of_keys; i++) { + KEY* key; + dict_index_t* index; + + key = table->key_info + key_num[i]; + ut_a(key); + + index = dict_table_get_index_on_name_and_min_id( + prebuilt->table, key->name); + + if (index) { + index->to_be_dropped = FALSE; + } + } + } + + row_mysql_unlock_data_dictionary(trx); + + error = convert_error_code_to_mysql(error, current_thd); + + DBUG_RETURN((int)error); +} + +/*********************************************************************** +Finalize a drop index */ + +int +ha_innobase::final_drop_index( +/*==========================*/ + /* out: 0 or error number */ + TABLE* table) /* in: Table where indexes are dropped */ +{ + dict_index_t* index; /* Index to be dropped */ + trx_t* trx; /* Transaction */ + ulint error = DB_SUCCESS;/* DB_SUCCESS or error code */ + + DBUG_ENTER("final_drop_index"); + ut_ad(table); + + /* Create a new transaction for final index drop if it does not + * exits*/ + + trx = check_trx_exists(ht, current_thd); + trx_search_latch_release_if_reserved(trx); + + /* Drop indexes marked to be dropped */ + + row_mysql_lock_data_dictionary(trx); + + index = dict_table_get_first_index_noninline(prebuilt->table); + + while (index && error == DB_SUCCESS) { + dict_index_t* next_index; + + next_index = dict_table_get_next_index_noninline(index); + + if (index->to_be_dropped == TRUE) { + + error = row_merge_remove_index( + index, prebuilt->table, trx); + } + + index = next_index; + } + + prebuilt->table->version_number++; + +#ifdef UNIV_DEBUG + dict_table_check_for_dup_indexes(prebuilt->table); +#endif + + row_mysql_unlock_data_dictionary(trx); + + /* Flush the log to reduce probability that the .frm files and + the InnoDB data dictionary get out-of-sync if the user runs + with innodb_flush_log_at_trx_commit = 0 */ + + log_buffer_flush_to_disk(); + +#ifdef UNIV_DEBUG_INDEX_CREATE + row_merge_print_table(prebuilt->table); +#endif + + /* Tell the InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + trx_commit_for_mysql(trx); + + error = convert_error_code_to_mysql(error, current_thd); + + DBUG_RETURN((int) error); +} + /*********************************************************************** This function is used to rollback one X/Open XA distributed transaction which is in the prepared state */ @@ -7630,24 +8742,24 @@ bool ha_innobase::check_if_incompatible_data( { if (table_changes != IS_EQUAL_YES) { - return COMPATIBLE_DATA_NO; + return(COMPATIBLE_DATA_NO); } /* Check that auto_increment value was not changed */ if ((info->used_fields & HA_CREATE_USED_AUTO) && info->auto_increment_value != 0) { - return COMPATIBLE_DATA_NO; + return(COMPATIBLE_DATA_NO); } /* Check that row format didn't change */ if ((info->used_fields & HA_CREATE_USED_AUTO) && get_row_type() != info->row_type) { - return COMPATIBLE_DATA_NO; + return(COMPATIBLE_DATA_NO); } - return COMPATIBLE_DATA_YES; + return(COMPATIBLE_DATA_YES); } /*********************************************************************** diff --git a/handler/ha_innodb.h b/handler/ha_innodb.h index f7ecec969ab..b68b891db39 100644 --- a/handler/ha_innodb.h +++ b/handler/ha_innodb.h @@ -85,13 +85,9 @@ class ha_innobase: public handler const char *index_type(uint key_number) { return "BTREE"; } const char** bas_ext() const; ulonglong table_flags() const { return int_table_flags; } - ulong index_flags(uint idx, uint part, bool all_parts) const - { - return (HA_READ_NEXT | - HA_READ_PREV | - HA_READ_ORDER | - HA_READ_RANGE | - HA_KEYREAD_ONLY); + ulong index_flags(uint idx, uint part, bool all_parts) const { + return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER + | HA_READ_RANGE | HA_KEYREAD_ONLY); } uint max_supported_keys() const { return MAX_KEY; } /* An InnoDB page must store >= 2 keys; @@ -117,6 +113,7 @@ class ha_innobase: public handler void try_semi_consistent_read(bool yes); void unlock_row(); + bool is_index_available(uint index); int index_init(uint index, bool sorted); int index_end(); int index_read(byte * buf, const byte * key, @@ -185,6 +182,10 @@ class ha_innobase: public handler static ulonglong get_mysql_bin_log_pos(); bool primary_key_is_clustered() { return true; } int cmp_ref(const byte *ref1, const byte *ref2); + int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys); + int prepare_drop_index(TABLE *table_arg, uint *key_num, + uint num_of_keys); + int final_drop_index(TABLE *table_arg); bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); }; diff --git a/include/data0data.ic b/include/data0data.ic index 5ea5d8dd373..f9a9eaf92b4 100644 --- a/include/data0data.ic +++ b/include/data0data.ic @@ -279,8 +279,13 @@ dtuple_create( ulint i; for (i = 0; i < n_fields; i++) { - (tuple->fields + i)->data = &data_error; - dfield_get_type(tuple->fields + i)->mtype = DATA_ERROR; + dfield_t* field; + + field = dtuple_get_nth_field(tuple, i); + + dfield_set_len(field, UNIV_SQL_NULL); + field->data = &data_error; + dfield_get_type(field)->mtype = DATA_ERROR; } } #endif diff --git a/include/db0err.h b/include/db0err.h index 12dc622618a..3033f0c75b3 100644 --- a/include/db0err.h +++ b/include/db0err.h @@ -62,6 +62,9 @@ Created 5/24/1996 Heikki Tuuri activated by the operation would lead to a duplicate key in some table */ +#define DB_CANNOT_DROP_FOREIGN_INDEX 47 /* we cannot drop an index because + it is needed on foreign key + constraint */ /* The following are partial failure codes */ #define DB_FAIL 1000 diff --git a/include/dict0dict.h b/include/dict0dict.h index 3d03d0216b2..b3f86f33f5c 100644 --- a/include/dict0dict.h +++ b/include/dict0dict.h @@ -53,6 +53,15 @@ dict_remove_db_name( /* out: table name */ const char* name); /* in: table name in the form dbname '/' tablename */ +/************************************************************************** +Returns a table object based on table id. */ + +dict_table_t* +dict_table_get_on_id( +/*=================*/ + /* out: table, NULL if does not exist */ + dulint table_id, /* in: table id */ + trx_t* trx); /* in: transaction handle */ /************************************************************************ Decrements the count of open MySQL handles to a table. */ @@ -248,6 +257,14 @@ dict_table_rename_in_cache( to preserve the original table name in constraints which reference it */ /************************************************************************** +Removes an index from the dictionary cache. */ + +void +dict_index_remove_from_cache( +/*=========================*/ + dict_table_t* table, /* in: table */ + dict_index_t* index); /* in, own: index */ +/************************************************************************** Change the id of a table object in the dictionary cache. This is used in DISCARD TABLESPACE. */ @@ -270,14 +287,34 @@ dict_foreign_add_to_cache( ibool check_charsets);/* in: TRUE=check charset compatibility */ /************************************************************************* +Check if the index is referenced by a foreign key, if TRUE return the +matching instance NULL otherwise. */ + +dict_foreign_t* +dict_table_get_referenced_constraint( +/*=================================*/ + /* out: pointer to foreign key struct if index + is defined for foreign key, otherwise NULL */ + dict_table_t* table, /* in: InnoDB table */ + dict_index_t* index); /* in: InnoDB index */ +/************************************************************************* Checks if a table is referenced by foreign keys. */ ibool -dict_table_referenced_by_foreign_key( -/*=================================*/ - /* out: TRUE if table is referenced by a - foreign key */ - dict_table_t* table); /* in: InnoDB table */ +dict_table_is_referenced_by_foreign_key( +/*====================================*/ + /* out: TRUE if table is referenced + by a foreign key */ + const dict_table_t* table); /* in: InnoDB table */ +/************************************************************************** +Replace the index in the foreign key list that matches this index's +definition with an equivalent index. */ + +void +dict_table_replace_index_in_foreign_list( +/*=====================================*/ + dict_table_t* table, /* in/out: table */ + dict_index_t* index); /* in: index to be replaced */ /************************************************************************** Determines whether a string starts with the specified keyword. */ @@ -290,6 +327,18 @@ dict_str_starts_with_keyword( const char* str, /* in: string to scan for keyword */ const char* keyword); /* in: keyword to look for */ /************************************************************************* +Checks if a index is defined for a foreign key constraint. Index is a part +of a foreign key constraint if the index is referenced by foreign key +or index is a foreign key index */ + +dict_foreign_t* +dict_table_get_foreign_constraint( +/*==============================*/ + /* out: pointer to foreign key struct if index + is defined for foreign key, otherwise NULL */ + dict_table_t* table, /* in: InnoDB table */ + dict_index_t* index); /* in: InnoDB index */ +/************************************************************************* Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. @@ -350,24 +399,18 @@ dict_table_get( /* in: whether to increment the open handle count on the table */ /************************************************************************** -Returns a table object based on table id. */ +Returns a index object, based on table and index id, and memoryfixes it. */ -dict_table_t* -dict_table_get_on_id( -/*=================*/ - /* out: table, NULL if does not exist */ - dulint table_id, /* in: table id */ - trx_t* trx); /* in: transaction handle */ -/************************************************************************** -Returns a table object based on table id. */ -UNIV_INLINE -dict_table_t* -dict_table_get_on_id_low( +dict_index_t* +dict_index_get_on_id_low( /*=====================*/ - /* out: table, NULL if does not exist */ - dulint table_id); /* in: table id */ + /* out: index, NULL if does not + exist */ + dict_table_t* table, /* in: table */ + dulint index_id); /* in: index id */ /************************************************************************** Checks if a table is in the dictionary cache. */ + UNIV_INLINE dict_table_t* dict_table_check_if_in_cache_low( @@ -384,6 +427,14 @@ dict_table_get_low( /* out: table, NULL if not found */ const char* table_name); /* in: table name */ /************************************************************************** +Returns a table object based on table id. */ +UNIV_INLINE +dict_table_t* +dict_table_get_on_id_low( +/*=====================*/ + /* out: table, NULL if does not exist */ + dulint table_id); /* in: table id */ +/************************************************************************** A noninlined version of dict_table_get_low. */ dict_table_t* @@ -392,23 +443,17 @@ dict_table_get_low_noninlined( /* out: table, NULL if not found */ const char* table_name); /* in: table name */ /************************************************************************** -Returns an index object. */ -UNIV_INLINE -dict_index_t* -dict_table_get_index( -/*=================*/ - /* out: index, NULL if does not exist */ - dict_table_t* table, /* in: table */ - const char* name); /* in: index name */ -/************************************************************************** -Returns an index object. */ +Returns an index object by matching on the name and column names and if +more than index is found return the index with the higher id.*/ dict_index_t* -dict_table_get_index_noninline( +dict_table_get_index_by_max_id( /*===========================*/ /* out: index, NULL if does not exist */ dict_table_t* table, /* in: table */ - const char* name); /* in: index name */ + const char* name, /* in: index name to find*/ + const char** column_names, /* in: column names to match */ + ulint n_cols);/* in: number of columns */ /************************************************************************** Returns a column's name. */ @@ -855,6 +900,14 @@ dict_index_check_search_tuple( /* out: TRUE if ok */ dict_index_t* index, /* in: index */ const dtuple_t* tuple); /* in: tuple used in a search */ +/************************************************************************** +Check for duplicate index entries in a table [using the index name] */ + +void +dict_table_check_for_dup_indexes( +/*=============================*/ + dict_table_t* table); /* in: Check for dup indexes in this table */ + #endif /* UNIV_DEBUG */ /************************************************************************** Builds a node pointer out of a physical record and a page number. */ @@ -1012,6 +1065,118 @@ dict_scan_to( /* out: scanned up to this */ const char* ptr, /* in: scan from */ const char* string);/* in: look for this */ +/************************************************************************* +Removes an index from the cache */ + +void +dict_index_remove_from_cache( +/*=========================*/ + dict_table_t* table, /* in: table */ + dict_index_t* index); /* in, own: index */ +/************************************************************************** +Get index by name */ + +dict_index_t* +dict_table_get_index_on_name( +/*=========================*/ + /* out: index, NULL if does not exist */ + dict_table_t* table, /* in: table */ + const char* name); /* in: name of the index to find */ +/************************************************************************** +Find and index that is equivalent to the one passed in and is not marked +for deletion. */ + +dict_index_t* +dict_table_find_equivalent_index( +/*=============================*/ + dict_table_t* table, /* in/out: table */ + dict_index_t* index); /* in: index to match */ +/************************************************************************** +Find and return an index in the table that matches the index id.*/ + +dict_index_t* +dict_table_get_index_on_id_noninline( +/*=================================*/ + /* out: index, NULL if does not exist */ + dict_table_t* table, /* in: table */ + dulint index_id);/* in: table id */ +/************************************************************************** +In case there is more than one index with the same name return the index +with the min(id). */ + +dict_index_t* +dict_table_get_index_on_name_and_min_id( +/*====================================*/ + /* out: index, NULL if does not exist */ + dict_table_t* table, /* in: table */ + const char* name); /* in: name of the index to find */ +/************************************************************************** +Create and return an undo list. */ + +void +dict_undo_create_list( +/*==================*/ + trx_t* trx); /* in: create undo list for this trx.*/ +/************************************************************************** +Create element of the undo list and append to the passed in list. */ + +dict_undo_t* +dict_undo_create_element( +/*=====================*/ /* out: dict_undo_t element*/ + trx_t* trx); /* in: create & add elem to this trx.*/ +/************************************************************************** +Free all the nodes on the undo list and free list.*/ + +void +dict_undo_free_list( +/*================*/ + trx_t* trx); /* in: free this trx's undo list */ +/************************************************************************** +Create and return a redo list. */ + +void +dict_redo_create_list( +/*==================*/ + trx_t* trx); /* in: create redo list for this trx.*/ +/************************************************************************** +Create element of the redo list and append to the passed in transaction. */ + +dict_redo_t* +dict_redo_create_element( +/*=====================*/ /* out: dict_redo_t element*/ + trx_t* trx); /* in: create & add elem to this trx.*/ +/************************************************************************** +Free all the nodes on the redo list and free list.*/ + +void +dict_redo_free_list( +/*================*/ + trx_t* trx); /* in: free this trx's redo list */ +/************************************************************************** +Add the indexes to SYS_INDEX.*/ + +ulint +dict_rename_indexes( +/*================*/ + trx_t* trx,/* in: transaction */ + ibool commit_flag); /* in: ignored for now */ +/************************************************************************** +Remove the index from the transaction's REDO list.*/ + +void +dict_redo_remove_index( +/*===================*/ + trx_t* trx, /* in: transaction */ + dict_index_t* index); /* in: index to remove */ +/************************************************************************** +Get the index by name from the transaction's REDO list.*/ + +dict_index_t* +dict_redo_get_index_on_name( +/*========================*/ + trx_t* trx, /* in: transaction */ + dict_table_t* table, /* in: the table the index belongs to */ + const char* name); /* in: index name */ /* Buffers for storing detailed information about the latest foreign key and unique key errors */ extern FILE* dict_foreign_err_file; @@ -1050,6 +1215,10 @@ struct dict_sys_struct{ dict_table_t* sys_fields; /* SYS_FIELDS table */ }; +#define TEMP_TABLE_PREFIX '/' /* Table name prefix for temporary + internal tables. Used in fast index + creation etc. */ + #ifndef UNIV_NONINL #include "dict0dict.ic" #endif diff --git a/include/dict0dict.ic b/include/dict0dict.ic index 82eee566688..b0f111a622b 100644 --- a/include/dict0dict.ic +++ b/include/dict0dict.ic @@ -647,28 +647,3 @@ dict_table_get_on_id_low( return(table); } -/************************************************************************** -Returns an index object. */ -UNIV_INLINE -dict_index_t* -dict_table_get_index( -/*=================*/ - /* out: index, NULL if does not exist */ - dict_table_t* table, /* in: table */ - const char* name) /* in: index name */ -{ - dict_index_t* index = NULL; - - index = dict_table_get_first_index(table); - - while (index != NULL) { - if (ut_strcmp(name, index->name) == 0) { - - break; - } - - index = dict_table_get_next_index(index); - } - - return(index); -} diff --git a/include/dict0load.h b/include/dict0load.h index 9c122b0bff4..4033dbf3a08 100644 --- a/include/dict0load.h +++ b/include/dict0load.h @@ -13,6 +13,7 @@ Created 4/24/1996 Heikki Tuuri #include "univ.i" #include "dict0types.h" #include "ut0byte.h" +#include "mem0mem.h" /************************************************************************ In a crash recovery we already have all the tablespace objects created. diff --git a/include/dict0mem.h b/include/dict0mem.h index 508207af8cd..4009865d09e 100644 --- a/include/dict0mem.h +++ b/include/dict0mem.h @@ -24,6 +24,7 @@ Created 1/8/1996 Heikki Tuuri #include "lock0types.h" #include "hash0hash.h" #include "que0types.h" +#include "row0types.h" /* Type flags of an index: OR'ing of the flags is allowed to define a combination of types */ @@ -31,7 +32,8 @@ combination of types */ #define DICT_UNIQUE 2 /* unique index */ #define DICT_UNIVERSAL 4 /* index which can contain records from any other index */ -#define DICT_IBUF 8 /* insert buffer tree */ +#define DICT_IBUF 8 /* insert buffer tree */ +#define DICT_NOT_READY 16 /* this index is being build */ /* Types for a table object */ #define DICT_TABLE_ORDINARY 1 @@ -185,7 +187,7 @@ struct dict_index_struct{ dulint id; /* id of the index */ mem_heap_t* heap; /* memory heap */ ulint type; /* index type */ - const char* name; /* index name */ + char* name; /* index name */ const char* table_name; /* table name */ dict_table_t* table; /* back pointer to table */ unsigned space:32; @@ -207,6 +209,10 @@ struct dict_index_struct{ unsigned n_nullable:10;/* number of nullable fields */ unsigned cached:1;/* TRUE if the index object is in the dictionary cache */ + unsigned to_be_dropped:1; + /* TRUE if this index is marked to be + dropped in ha_innobase::prepare_drop_index(), + otherwise FALSE */ dict_field_t* fields; /* array of field descriptions */ UT_LIST_NODE_T(dict_index_t) indexes;/* list of indexes of the table */ @@ -224,6 +230,9 @@ struct dict_index_struct{ index tree */ rw_lock_t lock; /* read-write lock protecting the upper levels of the index tree */ + dulint trx_id; /* id of the transaction that created this + index. It can be zero which implies that + it was created on database startup.*/ #ifdef UNIV_DEBUG ulint magic_n;/* magic number */ # define DICT_INDEX_MAGIC_N 76789786 @@ -290,6 +299,14 @@ struct dict_table_struct{ innodb_file_per_table is defined in my.cnf; in Unix this is usually /tmp/..., in Windows \temp\... */ + unsigned version_number:32; + /* version number of this table definition. + Version number is 0 when table is created. + Every schema change implemented without + creating a new table and copying rows from + the old table to new table will increase this + number. For example adding or removing index, + adding or removing a column. */ unsigned space:32; /* space where the clustered index of the table is placed */ @@ -303,6 +320,8 @@ struct dict_table_struct{ calls DISCARD TABLESPACE on this table, and reset to FALSE in IMPORT TABLESPACE */ + unsigned to_be_dropped:1; /* if set then this table will + dropped when n_mysql_handles_opened is 0 */ unsigned cached:1;/* TRUE if the table object has been added to the dictionary cache */ unsigned flags:8;/* DICT_TF_COMPACT, ... */ @@ -406,6 +425,10 @@ struct dict_table_struct{ SELECT MAX(auto inc column) */ ib_longlong autoinc;/* autoinc counter value to give to the next inserted row */ + /*----------------------*/ + UT_LIST_BASE_NODE_T(row_prebuilt_t) prebuilts; + /* base node for the prebuilts defined + for the table */ #ifdef UNIV_DEBUG ulint magic_n;/* magic number */ # define DICT_TABLE_MAGIC_N 76333786 diff --git a/include/dict0types.h b/include/dict0types.h index b90545f2105..15aacfd6d27 100644 --- a/include/dict0types.h +++ b/include/dict0types.h @@ -9,6 +9,8 @@ Created 1/8/1996 Heikki Tuuri #ifndef dict0types_h #define dict0types_h +#include "ut0list.h" + typedef struct dict_sys_struct dict_sys_t; typedef struct dict_col_struct dict_col_t; typedef struct dict_field_struct dict_field_t; @@ -24,4 +26,48 @@ typedef dict_table_t dict_cluster_t; typedef struct ind_node_struct ind_node_t; typedef struct tab_node_struct tab_node_t; +/* Data types for dict_undo */ +union dict_undo_data_union { + + dict_index_t* index; /* The index to be dropped */ + + struct { + dict_table_t* old_table; /* All fields are required only for*/ + dict_table_t* tmp_table; /*RENAME, for CREATE and DROP we */ + dict_table_t* new_table; /*use only old_table */ + } table; +}; + +typedef union dict_undo_data_union dict_undo_data_t; + +/* During recovery these are the operations that need to be undone */ +struct dict_undo_struct { + ulint op_type; /* Discriminator one of : + TRX_UNDO_INDEX_CREATE_REC, + TRX_UNDO_TABLE_DROP_REC, + TRX_UNDO_TABLE_CREATE_REC, + TRX_UNDO_TABLE_RENAME_REC.*/ + dict_undo_data_t + data; /* Data required for UNDO */ + + UT_LIST_NODE_T(struct dict_undo_struct) + node; /* UNDO list node */ +}; + +typedef struct dict_undo_struct dict_undo_t; +typedef UT_LIST_BASE_NODE_T(dict_undo_t) dict_undo_list_t; + +/* TODO: Currently this data structure is a place holder for indexes +created by a transaction.* The REDO is a misnomer*/ +struct dict_redo_struct { + ulint op_type; /* Discriminator one of : + TRX_UNDO_INDEX_CREATE_REC.*/ + dict_index_t* index; /* The index created.*/ + + UT_LIST_NODE_T(struct dict_redo_struct) + node; /* REDO list node */ +}; + +typedef struct dict_redo_struct dict_redo_t; +typedef UT_LIST_BASE_NODE_T(dict_redo_t) dict_redo_list_t; #endif diff --git a/include/mem0mem.h b/include/mem0mem.h index 9eaebaa7954..056d33f230b 100644 --- a/include/mem0mem.h +++ b/include/mem0mem.h @@ -73,6 +73,12 @@ heap creation. */ Use this macro instead of the corresponding function! Macro for memory heap creation. */ +#define mem_heap_create_noninline(N) mem_heap_create_func_noninline(\ + (N), MEM_HEAP_DYNAMIC, __FILE__, __LINE__) +/****************************************************************** +Use this macro instead of the corresponding function! Macro for memory +heap creation. */ + #define mem_heap_create_in_buffer(N) mem_heap_create_func(\ (N), MEM_HEAP_BUFFER, __FILE__, __LINE__) /****************************************************************** @@ -89,6 +95,12 @@ heap freeing. */ #define mem_heap_free(heap) mem_heap_free_func(\ (heap), __FILE__, __LINE__) +/****************************************************************** +Use this macro instead of the corresponding function! Macro for memory +heap freeing. */ + +#define mem_heap_free_noninline(heap) mem_heap_free_func_noninline(\ + (heap), __FILE__, __LINE__) /********************************************************************* NOTE: Use the corresponding macros instead of this function. Creates a memory heap. For debugging purposes, takes also the file name and line as @@ -118,6 +130,37 @@ mem_heap_free_func( mem_heap_t* heap, /* in, own: heap to be freed */ const char* file_name, /* in: file name where freed */ ulint line); /* in: line where freed */ +/********************************************************************* +NOTE: Use the corresponding macros instead of this function. Creates a +memory heap. For debugging purposes, takes also the file name and line as +arguments. */ + +mem_heap_t* +mem_heap_create_func_noninline( +/*===========================*/ + /* out, own: memory heap, NULL if + did not succeed (only possible for + MEM_HEAP_BTR_SEARCH type heaps)*/ + ulint n, /* in: desired start block size, + this means that a single user buffer + of size n will fit in the block, + 0 creates a default size block; + if init_block is not NULL, n tells + its size in bytes */ + ulint type, /* in: heap type */ + const char* file_name, /* in: file name where created */ + ulint line); /* in: line where created */ +/********************************************************************* +NOTE: Use the corresponding macro instead of this function. Frees the space +occupied by a memory heap. In the debug version erases the heap memory +blocks. */ + +void +mem_heap_free_func_noninline( +/*=========================*/ + mem_heap_t* heap, /* in, own: heap to be freed */ + const char* file_name, /* in: file name where freed */ + ulint line); /* in: line where freed */ /******************************************************************* Allocates n bytes of memory from a memory heap. */ UNIV_INLINE @@ -131,6 +174,19 @@ mem_heap_alloc( ulint n); /* in: number of bytes; if the heap is allowed to grow into the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF */ +/******************************************************************* +Allocates n bytes of memory from a memory heap. */ + +void* +mem_heap_alloc_noninline( +/*=====================*/ + /* out: allocated storage, NULL if did not + succeed (only possible for + MEM_HEAP_BTR_SEARCH type heaps) */ + mem_heap_t* heap, /* in: memory heap */ + ulint n); /* in: number of bytes; if the heap is allowed + to grow into the buffer pool, this must be + <= MEM_MAX_ALLOC_IN_BUF */ /********************************************************************* Returns a pointer to the heap top. */ UNIV_INLINE @@ -193,6 +249,12 @@ Macro for memory buffer allocation */ #define mem_alloc_noninline(N) mem_alloc_func_noninline(\ (N), __FILE__, __LINE__) +/****************************************************************** +Use this macro instead of the corresponding function! +Macro for memory buffer allocation */ + +#define mem_free_noninline(N) mem_free_func_noninline(\ + (N), __FILE__, __LINE__) /******************************************************************* NOTE: Use the corresponding macro instead of this function. Allocates a single buffer of memory from the dynamic memory of @@ -238,6 +300,18 @@ mem_free_func( const char* file_name, /* in: file name where created */ ulint line /* in: line where created */ ); +/******************************************************************* +NOTE: Use the corresponding macro instead of this function. +Frees a single buffer of storage from +the dynamic memory of C compiler. Similar to free of C. */ + +void +mem_free_func_noninline( +/*====================*/ + void* ptr, /* in, own: buffer to be freed */ + const char* file_name, /* in: file name where created */ + ulint line /* in: line where created */ +); /************************************************************************** Duplicates a NUL-terminated string. */ diff --git a/include/os0file.h b/include/os0file.h index 5ffcdf7e58c..dbdf1890a4b 100644 --- a/include/os0file.h +++ b/include/os0file.h @@ -729,4 +729,15 @@ os_file_get_status( os_file_stat_t* stat_info); /* information of a file in a directory */ +#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__) +/************************************************************************* +Creates a temporary file that will be deleted on close. +This function is defined in ha_innodb.cc. */ + +int +innobase_mysql_tmpfile(void); +/*========================*/ + /* out: temporary file descriptor, or < 0 on error */ +#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */ + #endif diff --git a/include/row0ins.h b/include/row0ins.h index 520ce5f724d..286dce98589 100644 --- a/include/row0ins.h +++ b/include/row0ins.h @@ -93,6 +93,13 @@ row_ins_step( /*=========*/ /* out: query thread to run next or NULL */ que_thr_t* thr); /* in: query thread */ +/*************************************************************** +Creates an entry template for each index of a table. */ + +void +ins_node_create_entry_list( +/*=======================*/ + ins_node_t* node); /* in: row insert node */ /* Insert node structure */ @@ -112,6 +119,11 @@ struct ins_node_struct{ this should be reset to NULL */ UT_LIST_BASE_NODE_T(dtuple_t) entry_list;/* list of entries, one for each index */ + ulint table_version_number; + /* entry_list is created for this version + of the table. If this version is not same + as table->version_number, entry_list must + be re-created. */ byte* row_id_buf;/* buffer for the row id sys field in row */ dulint trx_id; /* trx id or the last trx which executed the node */ diff --git a/include/row0merge.h b/include/row0merge.h new file mode 100644 index 00000000000..c8a43c67fd7 --- /dev/null +++ b/include/row0merge.h @@ -0,0 +1,305 @@ +/****************************************************** +Index build routines using a merge sort + +(c) 2005 Innobase Oy + +Created 13/06/2005 Jan Lindstrom +*******************************************************/ + +#ifndef row0merge_h +#define row0merge_h + +#include "univ.i" +#include "data0data.h" +#include "dict0types.h" +#include "trx0types.h" +#include "que0types.h" +#include "mtr0mtr.h" +#include "rem0types.h" +#include "rem0rec.h" +#include "read0types.h" +#include "btr0types.h" +#include "row0mysql.h" + +/* Block size for I/O operations in merge sort */ + +#define MERGE_BLOCK_SIZE 1048576 /* 1M */ + +/* Intentional free space on every block */ +#define MERGE_BLOCK_SAFETY_MARGIN 128 + +/* Enable faster index creation debug code */ +/* #define UNIV_DEBUG_INDEX_CREATE 1 */ + +/* This block header structure is used to create linked list of the +blocks to the disk. Every block contains one header.*/ + +struct merge_block_header_struct { + ulint n_records; /* Number of records in the block. */ + dulint offset; /* Offset of this block in the disk. */ + dulint next; /* Offset to next block in the disk. */ +}; + +typedef struct merge_block_header_struct merge_block_header_t; + +/* This block structure is used to hold index records in the disk +and the memory */ + +struct merge_block_struct { + merge_block_header_t header; /* Block header information */ + char data[MERGE_BLOCK_SIZE - sizeof(merge_block_header_t)];/* Data area i.e. heap */ +}; + +typedef struct merge_block_struct merge_block_t; + +/* Records are stored in the memory for main memory linked list +to this structure */ + +struct merge_rec_struct { + struct merge_rec_struct *next; /* Pointer to next record + in the list */ + rec_t* rec; /* Record */ +}; + +typedef struct merge_rec_struct merge_rec_t; + +/* This structure is head element for main memory linked list +used for main memory linked list merge sort */ + +struct merge_rec_list_struct { + merge_rec_t* head; /* Pointer to head of the + list */ + merge_rec_t* tail; /* Pointer to tail of the + list */ + ulint n_records; /* Number of records in + the list */ + ulint total_size; /* Total size of all records in + the list */ + mem_heap_t* heap; /* Heap where memory for this + list is allocated */ +}; + +typedef struct merge_rec_list_struct merge_rec_list_t; + +/* Information about temporary files used in merge sort are stored +to this structure */ + +struct merge_file_struct { + os_file_t file; /* File descriptor */ + dulint offset; /* File offset */ + ulint num_of_blocks; /* Number of blocks */ +}; + +typedef struct merge_file_struct merge_file_t; + +/* This structure holds parameters to thread which does a +disk based merge sort and inserts index records */ + +struct merge_thread_struct { + dict_index_t* index; /* in: Index to be created */ + row_prebuilt_t* prebuilt; /* in: Prebuilt */ + trx_t* trx; /* in: trx */ + os_file_t file; /* in: File handle */ + int error; /* out: error code or 0 */ +}; + +typedef struct merge_thread_struct merge_thread_t; + +/* This structure holds index field definitions */ + +struct merge_index_field_struct { + ulint col_type; /* Column type */ + ulint prefix_len; /* Prefix len */ + char* field_name; /* Field name */ +}; + +typedef struct merge_index_field_struct merge_index_field_t; + +/* This structure holds index definitions */ + +struct merge_index_def_struct { + ulint n_fields; /* Number of fields in index */ + ulint ind_type; /* 0, DICT_UNIQUE or DICT_CLUSTERED */ + char* name; /* Index name */ + merge_index_field_t** fields; /* Field definitions */ +}; + +typedef struct merge_index_def_struct merge_index_def_t; + +/************************************************************************ +Reads clustered index of the table and create temporary files +containing index entries for indexes to be built. */ + +ulint +row_merge_read_clustered_index( +/*===========================*/ + /* out: DB_SUCCESS if successfull, + or ERROR code */ + trx_t* trx, /* in: transaction */ + dict_table_t* table, /* in: table where index is created */ + dict_index_t** index, /* in: indexes to be created */ + merge_file_t** files, /* in: Files where to write index + entries */ + ulint num_of_idx); /* in: number of indexes to be + created */ +/************************************************************************ +Read sorted file containing index data tuples and insert these data +data tuples to the index */ + +ulint +row_merge_insert_index_tuples( +/*==========================*/ + /* out: 0 or error number */ + trx_t* trx, /* in: transaction */ + dict_index_t* index, /* in: index */ + dict_table_t* table, /* in: table */ + os_file_t file, /* in: file handle */ + dulint offset); /* in: offset where to start + reading */ +/***************************************************************** +Merge sort for linked list in the disk. */ + +dulint +row_merge_sort_linked_list_in_disk( +/*===============================*/ + /* out: offset to first block in + the list or ut_dulint_max in + case of error */ + dict_index_t* index, /* in: index to be created */ + os_file_t file, /* in: File handle */ + int* error); /* out: 0 or error */ + +/************************************************************************* +Allocate and initialize memory for a merge file structure */ + +merge_file_t* +row_merge_create_file_structure( +/*============================*/ + /* out: pointer to merge file + structure */ + mem_heap_t* heap); /* in: heap where merge file structure + is allocated */ +/************************************************************************* +A thread which merge sorts given file and inserts sorted records to +the index. */ + +#ifndef __WIN__ +void * +#else +ulint +#endif +row_merge_sort_and_insert_thread( +/*=============================*/ + /* out: a dummy parameter */ + void* arg); /* in: parameters */ + + +/************************************************************************* +Remove a index from system tables */ + +ulint +row_merge_remove_index( +/*===================*/ + /* out: error code or DB_SUCCESS */ + dict_index_t* index, /* in: index to be removed */ + dict_table_t* table, /* in: table */ + trx_t* trx); /* in: transaction handle */ + +/************************************************************************* +Print definition of a table in the dictionary */ + +void +row_merge_print_table( +/*==================*/ + dict_table_t* table); /* in: table */ +/************************************************************************* +Mark all prebuilts using the table obsolete. These prebuilts are +rebuilded later. */ + +void +row_merge_mark_prebuilt_obsolete( +/*=============================*/ + + trx_t* trx, /* in: trx */ + dict_table_t* table); /* in: table */ +/************************************************************************* +Create a temporary table using a definition of the old table. You must +lock data dictionary before calling this function. */ + +dict_table_t* +row_merge_create_temporary_table( +/*=============================*/ + /* out: new temporary table + definition */ + const char* table_name, /* in: new table name */ + dict_table_t* table, /* in: old table definition */ + trx_t* trx, /* in: trx */ + ulint* error); /* in:out/ error code or DB_SUCCESS */ +/************************************************************************* +Update all prebuilts for this table */ + +void +row_merge_prebuilts_update( +/*=======================*/ + + trx_t* trx, /* in: trx */ + dict_table_t* old_table); /* in: old table */ +/************************************************************************* +Create a temporary table using a definition of the old table. You must +lock data dictionary before calling this function. */ + +dict_table_t* +row_merge_create_temporary_table( +/*=============================*/ + /* out: new temporary table + definition */ + const char* table_name, /* in: new table name */ + dict_table_t* table, /* in: old table definition */ + trx_t* trx, /* in: trx */ + ulint* error); /* in:out/ error code or DB_SUCCESS */ +/************************************************************************* +Rename the indexes in the dicitionary. */ + +ulint +row_merge_rename_index( +/*===================*/ + /* out: DB_SUCCESS if all OK */ + trx_t* trx, /* in: Transaction */ + dict_table_t* table, /* in: Table for index */ + dict_index_t* index); /* in: Index to rename */ +/************************************************************************* +Create the index and load in to the dicitionary. */ + +ulint +row_merge_create_index( +/*===================*/ + /* out: DB_SUCCESS if all OK */ + trx_t* trx, /* in: transaction */ + dict_index_t** index, /* out: the instance of the index */ + dict_table_t* table, /* in: the index is on this table */ + const merge_index_def_t* /* in: the index definition */ + index_def); +/************************************************************************* +Check if a transaction can use an index.*/ + +ibool +row_merge_is_index_usable( +/*======================*/ + /* out: TRUE if index can be used by + the transaction else FALSE*/ + const trx_t* trx, /* in: transaction */ + const dict_index_t* /* in: index to check */ + index); +/************************************************************************* +If there are views that refer to the old table name then we "attach" to +the new instance of the table else we drop it immediately.*/ + +ulint +row_merge_drop_table( +/*=================*/ + /* out: DB_SUCCESS if all OK else + error code.*/ + trx_t* trx, /* in: transaction */ + dict_table_t* table); /* in: table instance to drop */ +#endif /* row0merge.h */ + diff --git a/include/row0mysql.h b/include/row0mysql.h index 158ac7c72a3..155cac13ffc 100644 --- a/include/row0mysql.h +++ b/include/row0mysql.h @@ -21,7 +21,7 @@ Created 9/17/2000 Heikki Tuuri extern ibool row_rollback_on_timeout; -typedef struct row_prebuilt_struct row_prebuilt_t; +/* typedef struct row_prebuilt_struct row_prebuilt_t; */ /*********************************************************************** Frees the blob heap in prebuilt when no longer needed. */ @@ -153,6 +153,14 @@ row_update_prebuilt_trx( row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL handle */ trx_t* trx); /* in: transaction handle */ +/************************************************************************ +Update a prebuilt struct for a MySQL table handle. */ + +void +row_update_prebuilt( +/*================*/ + row_prebuilt_t* prebuilt, /* in: Innobase table handle */ + dict_table_t* table); /* in: table */ /************************************************************************* Unlocks an AUTO_INC type lock possibly reserved by trx. */ @@ -188,6 +196,16 @@ row_lock_table_for_mysql( prebuilt->select_lock_type */ ulint mode); /* in: lock mode of table (ignored if table==NULL) */ +/************************************************************************* +Sets a table lock on the table. */ + +int +row_lock_table_for_merge( +/*=====================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: lock table for this trx */ + dict_table_t* table, /* in: table to lock */ + ulint mode); /* in: lock mode of table */ /************************************************************************* Does an insert for MySQL. */ @@ -413,6 +431,19 @@ row_drop_table_for_mysql( ibool drop_db);/* in: TRUE=dropping whole database */ /************************************************************************* +Drops a table for MySQL. If the name of the dropped table ends to +characters INNODB_MONITOR, then this also stops printing of monitor +output by the master thread. But does not commit the transaction, this +is required for UNDOing dictionary records during recovery.*/ + +int +row_drop_table_for_mysql_no_commit( +/*===============================*/ + /* out: error code or DB_SUCCESS */ + const char* name, /* in: table name */ + trx_t* trx, /* in: transaction handle */ + ibool drop_db);/* in: TRUE=dropping whole database */ +/************************************************************************* Discards the tablespace of a table which stored in an .ibd file. Discarding means that this function deletes the .ibd file and assigns a new table id for the table. Also the flag table->ibd_file_missing is set TRUE. */ @@ -451,7 +482,8 @@ row_rename_table_for_mysql( /* out: error code or DB_SUCCESS */ const char* old_name, /* in: old table name */ const char* new_name, /* in: new table name */ - trx_t* trx); /* in: transaction handle */ + trx_t* trx, /* in: transaction handle */ + ibool commit); /* in: if TRUE then commit trx */ /************************************************************************* Checks a table for corruption. */ @@ -462,7 +494,93 @@ row_check_table_for_mysql( row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL handle */ #endif /* !UNIV_HOTBACKUP */ +/************************************************************************* +Build new indexes to a table by reading a cluster index, +creating a temporary file containing index entries, merge sorting +these index entries and inserting sorted index entries to indexes. */ +ulint +row_build_index_for_mysql( +/*====================*/ + /* out: 0 or error code */ + trx_t* trx, /* in: transaction */ + dict_table_t* old_table, /* in: Table where rows are + read from */ + dict_table_t* new_table, /* in: Table where indexes are + created. Note that old_table == + new_table if we are creating a + secondary keys. */ + dict_index_t** index, /* in: Indexes to be created */ + ibool new_primary, /* in: new primary key + i.e. clustered index will be build + for this table */ + ulint num_of_keys); /* in: Number of indexes to be + created */ +/************************************************************************* +Create query graph for a index creation */ + +ulint +row_create_index_graph_for_mysql( +/*=============================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: trx */ + dict_table_t* table, /* in: table */ + dict_index_t* index); /* in: index */ +/************************************************************************* +Remove those indexes which were created before a error happened in +the index build */ + +ulint +row_remove_indexes_for_mysql( +/*=========================*/ + /* out: 0 or error code */ + trx_t* trx, /* in: transaction */ + dict_table_t* table, /* in: Table where index is created */ + dict_index_t** index, /* in: Indexes to be created */ + ulint num_created); /* in: Number of indexes created + before error and now must be removed */ +/*************************************************************************** +Writes information to an undo log about dictionary operation, create_table. +This information is used in a rollback of the transaction. */ + +ulint +row_undo_report_create_table_dict_operation( +/*========================================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: transaction */ + const char* table_name); /* in: table name created. */ +/*************************************************************************** +Writes information to an undo log about dictionary operation, rename_table. +This information is used in a rollback of the transaction. */ + +ulint +row_undo_report_create_index_dict_operation( +/*========================================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: transaction */ + dict_index_t* index); /* in: index created. */ +/*************************************************************************** +Writes information to an undo log about dictionary operation, rename_table. +This information is used in a rollback of the transaction. */ + +ulint +row_undo_report_rename_table_dict_operation( +/*========================================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: transaction */ + const char* from_table_name,/* in: rename from table name. */ + const char* to_table_name, /* in: rename to table table. */ + const char* tmp_table_name);/* in: intermediate table name */ +/*************************************************************************** +Writes information to an undo log about dictionary operation, drop table. +This information is used in a rollback of the transaction. */ + +ulint +row_undo_report_drop_table_dict_operation( +/*======================================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: query thread */ + const char* table_name); /* in: table name dropped */ /* A struct describing a place for an individual column in the MySQL row format which is presented to the table handler in ha_innobase. This template struct is used to speed up row transformations between @@ -512,16 +630,18 @@ struct mysql_row_templ_struct { #define ROW_PREBUILT_ALLOCATED 78540783 #define ROW_PREBUILT_FREED 26423527 +#define ROW_PREBUILT_OBSOLETE 12367541 /* A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; these are used to save CPU time. */ struct row_prebuilt_struct { ulint magic_n; /* this magic number is set to - ROW_PREBUILT_ALLOCATED when created - and to ROW_PREBUILT_FREED when the - struct has been freed; used in - debugging */ + ROW_PREBUILT_ALLOCATED when created, + or ROW_PREBUILT_FREED when the + struct has been freed or + ROW_PREBUILT_OBSOLETE when struct + needs a rebuilt */ dict_table_t* table; /* Innobase table handle */ trx_t* trx; /* current transaction handle */ ibool sql_stat_start; /* TRUE when we start processing of @@ -668,10 +788,12 @@ struct row_prebuilt_struct { fetched row in fetch_cache */ ulint n_fetch_cached; /* number of not yet fetched rows in fetch_cache */ - mem_heap_t* blob_heap; /* in SELECTS BLOB fie lds are copied + mem_heap_t* blob_heap; /* in SELECTS BLOB fields are copied to this heap */ mem_heap_t* old_vers_heap; /* memory heap where a previous version is built in consistent read */ + UT_LIST_NODE_T(row_prebuilt_t) prebuilts; + /* list node of table->prebuilts */ ulint magic_n2; /* this should be the same as magic_n */ }; diff --git a/include/row0row.h b/include/row0row.h index 3a7c1050461..f4f9d11d7c3 100644 --- a/include/row0row.h +++ b/include/row0row.h @@ -52,15 +52,16 @@ row_get_rec_roll_ptr( dict_index_t* index, /* in: clustered index */ const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /********************************************************************* -When an insert to a table is performed, this function builds the entry which -has to be inserted to an index on the table. */ +When an insert or purge to a table is performed, this function builds +the entry to be inserted into or purged from an index on the table. */ dtuple_t* row_build_index_entry( /*==================*/ - /* out: index entry which should be inserted */ - const dtuple_t* row, /* in: row which should be inserted to the - table */ + /* out: index entry which should be + inserted or purged */ + const dtuple_t* row, /* in: row which should be + inserted or purged */ row_ext_t* ext, /* in: externally stored column prefixes, or NULL */ dict_index_t* index, /* in: index on the table */ diff --git a/include/row0types.h b/include/row0types.h index c0dfdd077db..94180c12790 100644 --- a/include/row0types.h +++ b/include/row0types.h @@ -36,4 +36,6 @@ typedef struct purge_node_struct purge_node_t; typedef struct row_ext_struct row_ext_t; +typedef struct row_prebuilt_struct row_prebuilt_t; + #endif diff --git a/include/row0uins.h b/include/row0uins.h index e28d5363048..9812f8470b4 100644 --- a/include/row0uins.h +++ b/include/row0uins.h @@ -28,6 +28,15 @@ row_undo_ins( /* out: DB_SUCCESS */ undo_node_t* node); /* in: row undo node */ +/*************************************************************** +Parses the rec_type undo record. */ + +byte* +row_undo_ins_parse_rec_type_and_table_id( +/*=====================================*/ + /* out: ptr to next field to parse */ + undo_node_t* node, /* in: row undo node */ + dulint* table_id); /* out: table id */ #ifndef UNIV_NONINL #include "row0uins.ic" diff --git a/include/row0undo.h b/include/row0undo.h index 54dd1938a66..d6963d709bc 100644 --- a/include/row0undo.h +++ b/include/row0undo.h @@ -51,6 +51,24 @@ row_undo_step( /*==========*/ /* out: query thread to run next or NULL */ que_thr_t* thr); /* in: query thread */ +/*************************************************************** +Build the dict undo list*/ + +ulint +row_undo_build_dict_undo_list( +/*==========================*/ + /* out: DB_SUCCESS or error code */ + undo_node_t* node); /* in: row undo node */ + +/*************************************************************** +Undo or redo a dictionary change */ + +ulint +row_undo_dictionary( +/*================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: the transaction */ + dict_undo_t* dict_undo); /* in: dict op to undo */ /* A single query thread will try to perform the undo for all successive versions of a clustered index record, if the transaction has modified it @@ -78,6 +96,20 @@ struct undo_node_struct{ dulint undo_no;/* undo number of the record */ ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC, ... */ + ulint rec_sub_type; /* undo log record subtype: + used when rec_type is + TRX_UNDO_DICTIONARY_REC or 0*/ + char* new_table_name;/* table name in + TRX_UNDO_TABLE_CREATE_REC or + TRX_UNDO_TABLE_RENAME_REC or + TRX_UNDO_TABLE_DROP_REC or NULL */ + char* old_table_name;/* old table name in + TRX_UNDO_TABLE_RENAME_REC or NULL */ + char* tmp_table_name; /* intermediate table name used + during rename & drop operation in + ha_innobase::add_index().*/ + dulint index_id;/* index id in TRX_UNDO_INDEX_CREATE_REC + or ut_dulint_zero */ dulint new_roll_ptr; /* roll ptr to restore to clustered index record */ dulint new_trx_id; /* trx id to restore to clustered index diff --git a/include/trx0rec.h b/include/trx0rec.h index a97b79e766c..112eb6d5d92 100644 --- a/include/trx0rec.h +++ b/include/trx0rec.h @@ -60,6 +60,17 @@ trx_undo_rec_get_undo_no( /*=====================*/ /* out: undo no */ trx_undo_rec_t* undo_rec); /* in: undo log record */ +/************************************************************************** + * Returns the start of the undo record data area. */ + +UNIV_INLINE +byte* +trx_undo_rec_get_ptr( +/*==================*/ + /* out: compiler info */ + trx_undo_rec_t* undo_rec, /* in: undo log record */ + dulint undo_no); /* in: undo no read from node */ + /************************************************************************** Reads from an undo log record the general parameters. */ @@ -201,6 +212,31 @@ trx_undo_report_row_operation( inserted undo log record, ut_dulint_zero if BTR_NO_UNDO_LOG flag was specified */ +/*************************************************************************** +Writes information to an undo log about dictionary operation e.g. +rename_table, create_table, create_index, drop table. This information +is used in a rollback of the transaction. */ + +ulint +trx_undo_report_dict_operation( +/*===========================*/ + /* out: DB_SUCCESS or error code */ + ulint op_type, /* in: TRX_UNDO_TABLE_CREATE_OP, + TRX_UNDO_TABLE_RENAME_OP, + TRX_UNDO_TABLE_DROP_OP, or + TRX_UNDO_INDEX_CREATE_OP */ + trx_t* trx, /* in: transaction */ + dict_index_t* index, /* in: + if TRX_UNDO_INDEX_CREATE_OP + index to be created*/ + const char* table_name, /* in: table name or NULL, used in + create table, rename table and + drop table*/ + const char* old_table_name, /* in: old table name or NULL. + used in rename table */ + const char* tmp_table_name, /* in: the intermediate name used */ + dulint* roll_ptr); /* out: rollback pointer to the + inserted undo log record */ /********************************************************************** Copies an undo record to heap. This function can be called if we know that the undo log record exists. */ @@ -279,24 +315,40 @@ trx_undo_parse_erase_page_end( /* Types of an undo log record: these have to be smaller than 16, as the compilation info multiplied by 16 is ORed to this value in an undo log record */ -#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */ -#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked + +#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */ +#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked record */ #define TRX_UNDO_UPD_DEL_REC 13 /* update of a delete marked record to a not delete marked record; also the fields of the record can change */ -#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields +#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields do not change */ +#define TRX_UNDO_DICTIONARY_REC 15 /* dictionary operation, detailed + operation type can be found from + undo log records subtype */ #define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by this and ORed to the type above */ -#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl +#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl to denote that we updated external storage fields: used by purge to free the external storage */ -/* Operation type flags used in trx_undo_report_row_operation */ -#define TRX_UNDO_INSERT_OP 1 -#define TRX_UNDO_MODIFY_OP 2 +/* Operation type flags used in trx_undo_report_row_operation +and trx_undo_report_dict_operation */ +#define TRX_UNDO_INSERT_OP 1 +#define TRX_UNDO_MODIFY_OP 2 +#define TRX_UNDO_INDEX_CREATE_OP 3 /* alter table add index */ +#define TRX_UNDO_TABLE_CREATE_OP 4 /* create table */ +#define TRX_UNDO_TABLE_RENAME_OP 5 /* rename table */ +#define TRX_UNDO_TABLE_DROP_OP 6 /* drop table */ + +/* Subtypes for dictionary operation */ +#define TRX_UNDO_NULL_REC 0 /* No subtype */ +#define TRX_UNDO_INDEX_CREATE_REC 1 /* index create record */ +#define TRX_UNDO_TABLE_CREATE_REC 2 /* table create record */ +#define TRX_UNDO_TABLE_RENAME_REC 3 /* table rename record */ +#define TRX_UNDO_TABLE_DROP_REC 4 /* table drop record */ #ifndef UNIV_NONINL #include "trx0rec.ic" diff --git a/include/trx0rec.ic b/include/trx0rec.ic index d1943da4e85..8ad0b514ebf 100644 --- a/include/trx0rec.ic +++ b/include/trx0rec.ic @@ -63,6 +63,20 @@ trx_undo_rec_get_undo_no( return(mach_dulint_read_much_compressed(ptr)); } +/************************************************************************** +Returns the start of the undo record data area. */ +UNIV_INLINE +byte* +trx_undo_rec_get_ptr( +/*=================*/ + /* out: compiler info */ + trx_undo_rec_t* undo_rec, /* in: undo log record */ + dulint undo_no) /* in: undo no read from node */ +{ + return (((byte*) undo_rec) + 3 + + mach_dulint_get_much_compressed_size(undo_no)); +} + /*************************************************************************** Copies the undo record to the heap. */ UNIV_INLINE diff --git a/include/trx0trx.h b/include/trx0trx.h index 3cc537e9010..4824cd01bbb 100644 --- a/include/trx0trx.h +++ b/include/trx0trx.h @@ -450,8 +450,19 @@ struct trx_struct{ table. This is a hint that the table may need to be dropped in crash recovery. */ - dulint table_id; /* table id if the preceding field is - TRUE */ + dict_undo_list_t* + dict_undo_list; /* List of undo records are created + during recovery.*/ + dict_redo_list_t* + dict_redo_list; /* List of indexes created by this + transaction.*/ + ulint (*sync_cb)(trx_t*, ibool); + /* Transaction synchronization + callback, if ibool parameter is TRUE + then callback invoked for commit else + rollback.*/ + dulint table_id; /* Table to drop iff dict_operation + is TRUE.*/ /*------------------------------*/ int active_trans; /* 1 - if a transaction in MySQL is active. 2 - if prepare_commit_mutex @@ -567,6 +578,9 @@ struct trx_struct{ void* error_info; /* if the error number indicates a duplicate key error, a pointer to the problematic index is stored here */ + ulint error_key_num; /* if the index creation fails to a + duplicate key error, a mysql key + number of that index is stored here */ sess_t* sess; /* session of the trx, NULL if none */ ulint que_state; /* TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT, ... */ diff --git a/lock/lock0lock.c b/lock/lock0lock.c index cf62d5c98d7..606018009c8 100644 --- a/lock/lock0lock.c +++ b/lock/lock0lock.c @@ -3678,7 +3678,9 @@ lock_table_enqueue_waiting( trx = thr_get_trx(thr); - if (trx->dict_operation) { + /* We have little choice here during index merge operations, and so + we suppress the printing of the message.*/ + if (trx->dict_operation && *table->name != TEMP_TABLE_PREFIX) { ut_print_timestamp(stderr); fputs(" InnoDB: Error: a table lock wait happens" " in a dictionary operation!\n" diff --git a/mem/mem0mem.c b/mem/mem0mem.c index 4a01923a4a4..7b831356087 100644 --- a/mem/mem0mem.c +++ b/mem/mem0mem.c @@ -101,6 +101,21 @@ mem_alloc_func_noninline( return(mem_alloc_func(n, file_name, line)); } +/******************************************************************* +NOTE: Use the corresponding macro instead of this function. +Frees a single buffer of storage from +the dynamic memory of C compiler. Similar to free of C. */ + +void +mem_free_func_noninline( +/*====================*/ + void* ptr, /* in, own: buffer to be freed */ + const char* file_name, /* in: file name where created */ + ulint line) /* in: line where created */ +{ + return(mem_free_func(ptr, file_name, line)); +} + /************************************************************************** Duplicates a NUL-terminated string, allocated from a memory heap. */ @@ -566,3 +581,60 @@ mem_validate_all_blocks(void) mem_pool_mutex_exit(); } #endif + +/******************************************************************* +Allocates n bytes of memory from a memory heap. */ + +void* +mem_heap_alloc_noninline( +/*=====================*/ + /* out: allocated storage, NULL if did not + succeed (only possible for + MEM_HEAP_BTR_SEARCH type heaps) */ + mem_heap_t* heap, /* in: memory heap */ + ulint n) /* in: number of bytes; if the heap is allowed + to grow into the buffer pool, this must be + <= MEM_MAX_ALLOC_IN_BUF */ +{ + return (mem_heap_alloc(heap, n)); +} + +/********************************************************************* +NOTE: Use the corresponding macros instead of this function. Creates a +memory heap. For debugging purposes, takes also the file name and line as +argument. */ + +mem_heap_t* +mem_heap_create_func_noninline( +/*===========================*/ + /* out, own: memory heap, NULL if + did not succeed (only possible for + MEM_HEAP_BTR_SEARCH type heaps)*/ + ulint n, /* in: desired start block size, + this means that a single user buffer + of size n will fit in the block, + 0 creates a default size block; + if init_block is not NULL, n tells + its size in bytes */ + ulint type, /* in: heap type */ + const char* file_name, /* in: file name where created */ + ulint line) /* in: line where created */ +{ + return(mem_heap_create_func(n, type, file_name, line)); +} + +/********************************************************************* +NOTE: Use the corresponding macro instead of this function. Frees the space +occupied by a memory heap. In the debug version erases the heap memory +blocks. */ + +void +mem_heap_free_func_noninline( +/*=========================*/ + mem_heap_t* heap, /* in, own: heap to be freed */ + const char* file_name __attribute__((unused)), + /* in: file name where freed */ + ulint line __attribute__((unused))) +{ + mem_heap_free_func(heap, file_name, line); +} diff --git a/mysql-test/innodb-index.result b/mysql-test/innodb-index.result new file mode 100644 index 00000000000..dac7aea5d08 --- /dev/null +++ b/mysql-test/innodb-index.result @@ -0,0 +1,974 @@ +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb; +insert into t1 values (5,5,'oo','oo'),(4,4,'tr','tr'),(3,4,'ad','ad'),(2,3,'ak','ak'); +commit; +alter table t1 add index b (b), add index b (b); +ERROR 42000: Duplicate key name 'b' +alter table t1 add index (b,b); +ERROR 42S21: Duplicate column name 'b' +alter table t1 add index d2 (d); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + KEY `d2` (`d`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL d2 23 NULL 4 +select * from t1 order by d; +a b c d +3 4 ad ad +2 3 ak ak +5 5 oo oo +4 4 tr tr +alter table t1 add unique index (b); +ERROR 23000: Duplicate entry '0' for key 'b' +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + KEY `d2` (`d`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +alter table t1 add index (b); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + KEY `d2` (`d`), + KEY `b` (`b`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +alter table t1 add unique index (c), add index (d); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `c` (`c`), + KEY `d2` (`d`), + KEY `b` (`b`), + KEY `d` (`d`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL c 11 NULL 4 +select * from t1 order by c; +a b c d +3 4 ad ad +2 3 ak ak +5 5 oo oo +4 4 tr tr +alter table t1 drop index b, add index (b); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `c` (`c`), + KEY `d2` (`d`), + KEY `d` (`d`), + KEY `b` (`b`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +insert into t1 values(6,1,'ggg','ggg'); +select * from t1; +a b c d +2 3 ak ak +3 4 ad ad +4 4 tr tr +5 5 oo oo +6 1 ggg ggg +select * from t1 order by b; +a b c d +6 1 ggg ggg +2 3 ak ak +3 4 ad ad +4 4 tr tr +5 5 oo oo +select * from t1 order by c; +a b c d +3 4 ad ad +2 3 ak ak +6 1 ggg ggg +5 5 oo oo +4 4 tr tr +select * from t1 order by d; +a b c d +3 4 ad ad +2 3 ak ak +6 1 ggg ggg +5 5 oo oo +4 4 tr tr +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 5 NULL 5 +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL c 11 NULL 5 +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL d2 23 NULL 5 +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `c` (`c`), + KEY `d2` (`d`), + KEY `d` (`d`), + KEY `b` (`b`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +drop table t1; +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb; +insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ad','ad'),(4,4,'afe','afe'); +commit; +alter table t1 add index (c(2)); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + KEY `c` (`c`(2)) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +alter table t1 add unique index (d(10)); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `d` (`d`(10)), + KEY `c` (`c`(2)) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +insert into t1 values(5,1,'ggg','ggg'); +select * from t1; +a b c d +1 1 ab ab +2 2 ac ac +3 3 ad ad +4 4 afe afe +5 1 ggg ggg +select * from t1 order by b; +a b c d +1 1 ab ab +5 1 ggg ggg +2 2 ac ac +3 3 ad ad +4 4 afe afe +select * from t1 order by c; +a b c d +1 1 ab ab +2 2 ac ac +3 3 ad ad +4 4 afe afe +5 1 ggg ggg +select * from t1 order by d; +a b c d +1 1 ab ab +2 2 ac ac +3 3 ad ad +4 4 afe afe +5 1 ggg ggg +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using filesort +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using filesort +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using filesort +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `d` (`d`(10)), + KEY `c` (`c`(2)) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +alter table t1 drop index d; +insert into t1 values(8,9,'fff','fff'); +select * from t1; +a b c d +1 1 ab ab +2 2 ac ac +3 3 ad ad +4 4 afe afe +5 1 ggg ggg +8 9 fff fff +select * from t1 order by b; +a b c d +1 1 ab ab +5 1 ggg ggg +2 2 ac ac +3 3 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by c; +a b c d +1 1 ab ab +2 2 ac ac +3 3 ad ad +4 4 afe afe +8 9 fff fff +5 1 ggg ggg +select * from t1 order by d; +a b c d +1 1 ab ab +2 2 ac ac +3 3 ad ad +4 4 afe afe +8 9 fff fff +5 1 ggg ggg +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + KEY `c` (`c`(2)) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +drop table t1; +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb; +insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe'); +commit; +alter table t1 add unique index (b,c); +insert into t1 values(8,9,'fff','fff'); +select * from t1; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by b; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by c; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by d; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 16 NULL 5 +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using filesort +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using filesort +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `b` (`b`,`c`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +alter table t1 add index (b,c); +insert into t1 values(11,11,'kkk','kkk'); +select * from t1; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +11 11 kkk kkk +select * from t1 order by b; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +11 11 kkk kkk +select * from t1 order by c; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +11 11 kkk kkk +select * from t1 order by d; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +11 11 kkk kkk +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 16 NULL 6 +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `b` (`b`,`c`), + KEY `b_2` (`b`,`c`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +alter table t1 add unique index (c,d); +insert into t1 values(13,13,'yyy','aaa'); +select * from t1; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +11 11 kkk kkk +13 13 yyy aaa +select * from t1 order by b; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +11 11 kkk kkk +13 13 yyy aaa +select * from t1 order by c; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +11 11 kkk kkk +13 13 yyy aaa +select * from t1 order by d; +a b c d +13 13 yyy aaa +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +11 11 kkk kkk +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 16 NULL 7 +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL c 34 NULL 7 +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 7 Using filesort +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `b` (`b`,`c`), + UNIQUE KEY `c` (`c`,`d`), + KEY `b_2` (`b`,`c`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +drop table t1; +create table t1(a int not null, b int not null, c int, primary key (a), key (b)) engine = innodb; +create table t3(a int not null, c int not null, d int, primary key (a), key (c)) engine = innodb; +create table t4(a int not null, d int not null, e int, primary key (a), key (d)) engine = innodb; +create table t2(a int not null, b int not null, c int not null, d int not null, e int, +primary key (a), foreign key (b) references t1(b), foreign key (c) references t3(c), +foreign key (d) references t4(d)) engine = innodb; +alter table t1 drop index b; +ERROR HY000: Cannot drop index 'b': needed in a foreign key constraint +alter table t3 drop index c; +ERROR HY000: Cannot drop index 'c': needed in a foreign key constraint +alter table t4 drop index d; +ERROR HY000: Cannot drop index 'd': needed in a foreign key constraint +alter table t2 drop index b; +ERROR HY000: Cannot drop index 'b': needed in a foreign key constraint +alter table t2 drop index b, drop index c, drop index d; +ERROR HY000: Cannot drop index 'b': needed in a foreign key constraint +set foreign_key_checks=0; +insert into t1 values (1,1,1); +insert into t3 values (1,1,1); +insert into t4 values (1,1,1); +insert into t2 values (1,1,1,1,1); +commit; +alter table t2 drop index b, add index (b); +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) NOT NULL, + `b` int(11) NOT NULL, + `c` int(11) NOT NULL, + `d` int(11) NOT NULL, + `e` int(11) DEFAULT NULL, + PRIMARY KEY (`a`), + KEY `c` (`c`), + KEY `d` (`d`), + KEY `b` (`b`), + CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`b`), + CONSTRAINT `t2_ibfk_2` FOREIGN KEY (`c`) REFERENCES `t3` (`c`), + CONSTRAINT `t2_ibfk_3` FOREIGN KEY (`d`) REFERENCES `t4` (`d`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +set foreign_key_checks=1; +set foreign_key_checks=0; +drop table if exists t1,t2,t3,t4; +set foreign_key_checks=1; +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) +engine = innodb default charset=utf8; +insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe'); +commit; +alter table t1 add unique index (b); +ERROR 23000: Duplicate entry '0' for key 'b' +insert into t1 values(8,9,'fff','fff'); +select * from t1; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by b; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by c; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by d; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using filesort +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using filesort +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using filesort +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 +alter table t1 add index (b); +insert into t1 values(10,10,'kkk','iii'); +select * from t1; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 kkk iii +select * from t1 order by b; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 kkk iii +select * from t1 order by c; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 kkk iii +select * from t1 order by d; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 kkk iii +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 5 NULL 6 +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + KEY `b` (`b`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 +alter table t1 add unique index (c), add index (d); +insert into t1 values(11,11,'aaa','mmm'); +select * from t1; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 kkk iii +11 11 aaa mmm +select * from t1 order by b; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 kkk iii +11 11 aaa mmm +select * from t1 order by c; +a b c d +11 11 aaa mmm +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 kkk iii +select * from t1 order by d; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 kkk iii +11 11 aaa mmm +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 5 NULL 7 +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL c 31 NULL 7 +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL d 63 NULL 7 +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `c` (`c`), + KEY `b` (`b`), + KEY `d` (`d`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) +engine = innodb default charset=ucs2; +insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe'); +commit; +alter table t1 add unique index (b); +ERROR 23000: Duplicate entry '0' for key 'b' +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=ucs2 +alter table t1 add index (b); +insert into t1 values(8,9,'fff','fff'); +select * from t1; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by b; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by c; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by d; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 5 NULL 5 +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using filesort +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using filesort +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + KEY `b` (`b`) +) ENGINE=InnoDB DEFAULT CHARSET=ucs2 +alter table t1 add unique index (c), add index (d); +insert into t1 values(10,10,'aaa','kkk'); +select * from t1; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 aaa kkk +select * from t1 order by b; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 aaa kkk +select * from t1 order by c; +a b c d +10 10 aaa kkk +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +select * from t1 order by d; +a b c d +1 1 ab ab +2 2 ac ac +3 2 ad ad +4 4 afe afe +8 9 fff fff +10 10 aaa kkk +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 5 NULL 6 +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL c 21 NULL 6 +explain select * from t1 order by d; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL d 43 NULL 6 +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `c` (`c`), + KEY `b` (`b`), + KEY `d` (`d`) +) ENGINE=InnoDB DEFAULT CHARSET=ucs2 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1(a int not null, b int) engine = innodb; +insert into t1 values (1,1),(1,1),(1,1),(1,1); +alter table t1 add unique index (a); +ERROR 23000: Duplicate entry '0' for key 'a' +alter table t1 add unique index (b); +ERROR 23000: Duplicate entry '0' for key 'b' +alter table t1 add unique index (a), add unique index(b); +ERROR 23000: Duplicate entry '0' for key 'a' +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +drop table t1; +create table t1(a int not null, c int not null,b int, primary key(a), unique key(c), key(b)) engine = innodb; +alter table t1 drop index c, drop index b; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `c` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +drop table t1; +create table t1(a int not null, b int, primary key(a)) engine = innodb; +alter table t1 add index (b); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + PRIMARY KEY (`a`), + KEY `b` (`b`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +drop table t1; +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb; +insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ac','ac'),(4,4,'afe','afe'); +alter table t1 add unique index (b), add unique index (c), add unique index (d); +ERROR 23000: Duplicate entry '' for key 'c' +alter table t1 add unique index (b), add index (d), add unique index (c); +ERROR 23000: Duplicate entry '' for key 'c' +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `d` varchar(20) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +drop table t1; +create table t1(a int not null, b int not null, c int, primary key (a), key(c)) engine=innodb; +insert into t1 values (5,1,5),(4,2,4),(3,3,3),(2,4,2),(1,5,1); +alter table t1 add unique index (b); +insert into t1 values (10,20,20),(11,19,19),(12,18,18),(13,17,17); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) NOT NULL, + `c` int(11) DEFAULT NULL, + PRIMARY KEY (`a`), + UNIQUE KEY `b` (`b`), + KEY `c` (`c`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL c 5 NULL 9 +explain select * from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 4 NULL 9 +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 4 NULL 9 +select * from t1 order by a; +a b c +1 5 1 +2 4 2 +3 3 3 +4 2 4 +5 1 5 +10 20 20 +11 19 19 +12 18 18 +13 17 17 +select * from t1 order by b; +a b c +5 1 5 +4 2 4 +3 3 3 +2 4 2 +1 5 1 +13 17 17 +12 18 18 +11 19 19 +10 20 20 +select * from t1 order by c; +a b c +1 5 1 +2 4 2 +3 3 3 +4 2 4 +5 1 5 +13 17 17 +12 18 18 +11 19 19 +10 20 20 +drop table t1; +create table t1(a int not null, b int not null) engine=innodb; +insert into t1 values (1,1); +alter table t1 add primary key(b); +insert into t1 values (2,2); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) NOT NULL, + PRIMARY KEY (`b`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +select * from t1; +a b +1 1 +2 2 +explain select * from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 +explain select * from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 Using filesort +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 4 NULL 2 +checksum table t1; +Table Checksum +test.t1 582702641 +drop table t1; +create table t1(a int not null) engine=innodb; +insert into t1 values (1); +alter table t1 add primary key(a); +insert into t1 values (2); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + PRIMARY KEY (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +commit; +select * from t1; +a +1 +2 +explain select * from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 4 NULL 2 Using index +explain select * from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 4 NULL 2 Using index +checksum table t1; +Table Checksum +test.t1 1531596814 +drop table t1; +create table t1(a int, b blob,c text) engine=innodb default charset = utf8; +insert into t1 values (1,repeat('jejdkrun87',220),repeat('jejdkrun87',440)); +insert into t1 values (2,repeat('adfd72nh9k',440),repeat('adfd72nh9k',1100)); +checksum table t1; +Table Checksum +test.t1 1121933170 +alter table t1 add primary key (a), add key (b(20)); +checksum table t1; +Table Checksum +test.t1 335046842 +insert into t1 values (3,repeat('adfdpplkeock',440),repeat('adfdpplkeock',1100)); +insert into t1 values (4,repeat('adfdijnmnb78k',440),repeat('adfdijnmnb78k',1100)); +insert into t1 values (5,repeat('adfdijn0loKNHJik',440),repeat('adfdijn0loKNHJik',1100)); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL DEFAULT '0', + `b` blob, + `c` text, + PRIMARY KEY (`a`), + KEY `b` (`b`(20)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +explain select * from t1 where b like 'adfd%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range b b 23 NULL 2 Using where +checksum table t1; +Table Checksum +test.t1 1008226368 +drop table t1; diff --git a/mysql-test/innodb-index.test b/mysql-test/innodb-index.test new file mode 100644 index 00000000000..0f6f5eb4803 --- /dev/null +++ b/mysql-test/innodb-index.test @@ -0,0 +1,282 @@ +-- source include/have_innodb.inc + +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb; +insert into t1 values (5,5,'oo','oo'),(4,4,'tr','tr'),(3,4,'ad','ad'),(2,3,'ak','ak'); +commit; +--error 1061 +alter table t1 add index b (b), add index b (b); +--error 1060 +alter table t1 add index (b,b); +alter table t1 add index d2 (d); +show create table t1; +explain select * from t1 order by d; +select * from t1 order by d; +--error 1582 +alter table t1 add unique index (b); +show create table t1; +alter table t1 add index (b); +show create table t1; +alter table t1 add unique index (c), add index (d); +show create table t1; +explain select * from t1 order by c; +select * from t1 order by c; +alter table t1 drop index b, add index (b); +show create table t1; +insert into t1 values(6,1,'ggg','ggg'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +drop table t1; + +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb; +insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ad','ad'),(4,4,'afe','afe'); +commit; +alter table t1 add index (c(2)); +show create table t1; +alter table t1 add unique index (d(10)); +show create table t1; +insert into t1 values(5,1,'ggg','ggg'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +alter table t1 drop index d; +insert into t1 values(8,9,'fff','fff'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +drop table t1; + +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb; +insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe'); +commit; +alter table t1 add unique index (b,c); +insert into t1 values(8,9,'fff','fff'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +alter table t1 add index (b,c); +insert into t1 values(11,11,'kkk','kkk'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +alter table t1 add unique index (c,d); +insert into t1 values(13,13,'yyy','aaa'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +drop table t1; + +create table t1(a int not null, b int not null, c int, primary key (a), key (b)) engine = innodb; +create table t3(a int not null, c int not null, d int, primary key (a), key (c)) engine = innodb; +create table t4(a int not null, d int not null, e int, primary key (a), key (d)) engine = innodb; +create table t2(a int not null, b int not null, c int not null, d int not null, e int, +primary key (a), foreign key (b) references t1(b), foreign key (c) references t3(c), +foreign key (d) references t4(d)) engine = innodb; +--error 1542 +alter table t1 drop index b; +--error 1542 +alter table t3 drop index c; +--error 1542 +alter table t4 drop index d; +--error 1542 +alter table t2 drop index b; +--error 1542 +alter table t2 drop index b, drop index c, drop index d; +set foreign_key_checks=0; +insert into t1 values (1,1,1); +insert into t3 values (1,1,1); +insert into t4 values (1,1,1); +insert into t2 values (1,1,1,1,1); +commit; +alter table t2 drop index b, add index (b); +show create table t2; +set foreign_key_checks=1; + +set foreign_key_checks=0; +--disable_warnings +drop table if exists t1,t2,t3,t4; +--enable_warnings +set foreign_key_checks=1; + +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) +engine = innodb default charset=utf8; +insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe'); +commit; +--error 1582 +alter table t1 add unique index (b); +insert into t1 values(8,9,'fff','fff'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +alter table t1 add index (b); +insert into t1 values(10,10,'kkk','iii'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +alter table t1 add unique index (c), add index (d); +insert into t1 values(11,11,'aaa','mmm'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +check table t1; +drop table t1; + +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) +engine = innodb default charset=ucs2; +insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe'); +commit; +--error 1582 +alter table t1 add unique index (b); +show create table t1; +alter table t1 add index (b); +insert into t1 values(8,9,'fff','fff'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +alter table t1 add unique index (c), add index (d); +insert into t1 values(10,10,'aaa','kkk'); +select * from t1; +select * from t1 order by b; +select * from t1 order by c; +select * from t1 order by d; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select * from t1 order by d; +show create table t1; +check table t1; +drop table t1; + +create table t1(a int not null, b int) engine = innodb; +insert into t1 values (1,1),(1,1),(1,1),(1,1); +--error 1582 +alter table t1 add unique index (a); +--error 1582 +alter table t1 add unique index (b); +--error 1582 +alter table t1 add unique index (a), add unique index(b); +show create table t1; +drop table t1; + +create table t1(a int not null, c int not null,b int, primary key(a), unique key(c), key(b)) engine = innodb; +alter table t1 drop index c, drop index b; +show create table t1; +drop table t1; + +create table t1(a int not null, b int, primary key(a)) engine = innodb; +alter table t1 add index (b); +show create table t1; +drop table t1; + +create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb; +insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ac','ac'),(4,4,'afe','afe'); +--error 1582 +alter table t1 add unique index (b), add unique index (c), add unique index (d); +--error 1582 +alter table t1 add unique index (b), add index (d), add unique index (c); +show create table t1; +drop table t1; + +create table t1(a int not null, b int not null, c int, primary key (a), key(c)) engine=innodb; +insert into t1 values (5,1,5),(4,2,4),(3,3,3),(2,4,2),(1,5,1); +alter table t1 add unique index (b); +insert into t1 values (10,20,20),(11,19,19),(12,18,18),(13,17,17); +show create table t1; +check table t1; +explain select * from t1 order by c; +explain select * from t1 order by a; +explain select * from t1 order by b; +select * from t1 order by a; +select * from t1 order by b; +select * from t1 order by c; +drop table t1; + +create table t1(a int not null, b int not null) engine=innodb; +insert into t1 values (1,1); +alter table t1 add primary key(b); +insert into t1 values (2,2); +show create table t1; +check table t1; +select * from t1; +explain select * from t1; +explain select * from t1 order by a; +explain select * from t1 order by b; +checksum table t1; +drop table t1; + +create table t1(a int not null) engine=innodb; +insert into t1 values (1); +alter table t1 add primary key(a); +insert into t1 values (2); +show create table t1; +check table t1; +commit; +select * from t1; +explain select * from t1; +explain select * from t1 order by a; +checksum table t1; +drop table t1; + +create table t1(a int, b blob,c text) engine=innodb default charset = utf8; +insert into t1 values (1,repeat('jejdkrun87',220),repeat('jejdkrun87',440)); +insert into t1 values (2,repeat('adfd72nh9k',440),repeat('adfd72nh9k',1100)); +checksum table t1; +alter table t1 add primary key (a), add key (b(20)); +checksum table t1; +insert into t1 values (3,repeat('adfdpplkeock',440),repeat('adfdpplkeock',1100)); +insert into t1 values (4,repeat('adfdijnmnb78k',440),repeat('adfdijnmnb78k',1100)); +insert into t1 values (5,repeat('adfdijn0loKNHJik',440),repeat('adfdijn0loKNHJik',1100)); +show create table t1; +check table t1; +explain select * from t1 where b like 'adfd%'; +checksum table t1; +drop table t1; diff --git a/mysql-test/innodb.test b/mysql-test/innodb.test index 75f2796abc6..3a200e41c08 100644 --- a/mysql-test/innodb.test +++ b/mysql-test/innodb.test @@ -1095,7 +1095,7 @@ show create table t2; create index id2 on t2 (id); show create table t2; drop index id2 on t2; ---error 1025,1025 +--error 1540,1540 drop index id on t2; show create table t2; drop table t2; diff --git a/os/os0file.c b/os/os0file.c index e0cbd3e6591..9be48efc886 100644 --- a/os/os0file.c +++ b/os/os0file.c @@ -497,17 +497,6 @@ os_io_init_simple(void) } } -#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__) -/************************************************************************* -Creates a temporary file that will be deleted on close. -This function is defined in ha_innodb.cc. */ - -int -innobase_mysql_tmpfile(void); -/*========================*/ - /* out: temporary file descriptor, or < 0 on error */ -#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */ - /*************************************************************************** Creates a temporary file. This function is like tmpfile(3), but the temporary file is created in the MySQL temporary directory. diff --git a/pars/pars0grm.c b/pars/pars0grm.c index 2e39b05bada..0d6706c43ab 100644 --- a/pars/pars0grm.c +++ b/pars/pars0grm.c @@ -1,4 +1,4 @@ -/* A Bison parser, made by GNU Bison 1.875d. */ +/* A Bison parser, made by GNU Bison 2.0. */ /* Skeleton parser for Yacc-like parsing with Bison, Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. @@ -292,7 +292,7 @@ typedef int YYSTYPE; /* Copy the second part of user declarations. */ -/* Line 214 of yacc.c. */ +/* Line 213 of yacc.c. */ #line 297 "pars0grm.tab.c" #if ! defined (yyoverflow) || YYERROR_VERBOSE @@ -308,14 +308,10 @@ typedef int YYSTYPE; # ifdef YYSTACK_USE_ALLOCA # if YYSTACK_USE_ALLOCA -# define YYSTACK_ALLOC alloca -# endif -# else -# if defined (alloca) || defined (_ALLOCA_H) -# define YYSTACK_ALLOC alloca -# else # ifdef __GNUC__ # define YYSTACK_ALLOC __builtin_alloca +# else +# define YYSTACK_ALLOC alloca # endif # endif # endif @@ -1059,20 +1055,53 @@ do \ } \ while (0) + #define YYTERROR 1 #define YYERRCODE 256 -/* YYLLOC_DEFAULT -- Compute the default location (before the actions - are run). */ +/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. + If N is 0, then set CURRENT to the empty location which ends + the previous symbol: RHS[0] (always defined). */ + +#define YYRHSLOC(Rhs, K) ((Rhs)[K]) #ifndef YYLLOC_DEFAULT -# define YYLLOC_DEFAULT(Current, Rhs, N) \ - ((Current).first_line = (Rhs)[1].first_line, \ - (Current).first_column = (Rhs)[1].first_column, \ - (Current).last_line = (Rhs)[N].last_line, \ - (Current).last_column = (Rhs)[N].last_column) +# define YYLLOC_DEFAULT(Current, Rhs, N) \ + do \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC (Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC (Rhs, 0).last_column; \ + } \ + while (0) #endif + +/* YY_LOCATION_PRINT -- Print the location on the stream. + This macro was not mandated originally: define only if we know + we won't break user code: when these are the locations we know. */ + +#ifndef YY_LOCATION_PRINT +# if YYLTYPE_IS_TRIVIAL +# define YY_LOCATION_PRINT(File, Loc) \ + fprintf (File, "%d.%d-%d.%d", \ + (Loc).first_line, (Loc).first_column, \ + (Loc).last_line, (Loc).last_column) +# else +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +# endif +#endif + + /* YYLEX -- calling `yylex' with the right arguments. */ #ifdef YYLEX_PARAM @@ -1095,19 +1124,13 @@ do { \ YYFPRINTF Args; \ } while (0) -# define YYDSYMPRINT(Args) \ -do { \ - if (yydebug) \ - yysymprint Args; \ -} while (0) - -# define YYDSYMPRINTF(Title, Token, Value, Location) \ +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ do { \ if (yydebug) \ { \ YYFPRINTF (stderr, "%s ", Title); \ yysymprint (stderr, \ - Token, Value); \ + Type, Value); \ YYFPRINTF (stderr, "\n"); \ } \ } while (0) @@ -1174,8 +1197,7 @@ do { \ int yydebug; #else /* !YYDEBUG */ # define YYDPRINTF(Args) -# define YYDSYMPRINT(Args) -# define YYDSYMPRINTF(Title, Token, Value, Location) +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) # define YY_STACK_PRINT(Bottom, Top) # define YY_REDUCE_PRINT(Rule) #endif /* !YYDEBUG */ @@ -1193,10 +1215,6 @@ int yydebug; SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH) evaluated with infinite-precision integer arithmetic. */ -#if defined (YYMAXDEPTH) && YYMAXDEPTH == 0 -# undef YYMAXDEPTH -#endif - #ifndef YYMAXDEPTH # define YYMAXDEPTH 10000 #endif @@ -1278,15 +1296,15 @@ yysymprint (yyoutput, yytype, yyvaluep) (void) yyvaluep; if (yytype < YYNTOKENS) - { - YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); -# ifdef YYPRINT - YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); -# endif - } + YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); else YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); + +# ifdef YYPRINT + if (yytype < YYNTOKENS) + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# endif switch (yytype) { default: @@ -1302,10 +1320,11 @@ yysymprint (yyoutput, yytype, yyvaluep) #if defined (__STDC__) || defined (__cplusplus) static void -yydestruct (int yytype, YYSTYPE *yyvaluep) +yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) #else static void -yydestruct (yytype, yyvaluep) +yydestruct (yymsg, yytype, yyvaluep) + const char *yymsg; int yytype; YYSTYPE *yyvaluep; #endif @@ -1313,6 +1332,10 @@ yydestruct (yytype, yyvaluep) /* Pacify ``unused variable'' warnings. */ (void) yyvaluep; + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); + switch (yytype) { @@ -1340,10 +1363,10 @@ int yyparse (); -/* The lookahead symbol. */ +/* The look-ahead symbol. */ int yychar; -/* The semantic value of the lookahead symbol. */ +/* The semantic value of the look-ahead symbol. */ YYSTYPE yylval; /* Number of syntax errors so far. */ @@ -1379,7 +1402,7 @@ yyparse () int yyresult; /* Number of tokens to shift before error messages enabled. */ int yyerrstatus; - /* Lookahead token as an internal (translated) token number. */ + /* Look-ahead token as an internal (translated) token number. */ int yytoken = 0; /* Three stacks and their tools: @@ -1431,6 +1454,8 @@ yyparse () yyvsp = yyvs; + yyvsp[0] = yylval; + goto yysetstate; /*------------------------------------------------------------. @@ -1520,18 +1545,18 @@ yyparse () yybackup: /* Do appropriate processing given the current state. */ -/* Read a lookahead token if we need one and don't already have one. */ +/* Read a look-ahead token if we need one and don't already have one. */ /* yyresume: */ - /* First try to decide what to do without reference to lookahead token. */ + /* First try to decide what to do without reference to look-ahead token. */ yyn = yypact[yystate]; if (yyn == YYPACT_NINF) goto yydefault; - /* Not known => get a lookahead token if don't already have one. */ + /* Not known => get a look-ahead token if don't already have one. */ - /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ + /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */ if (yychar == YYEMPTY) { YYDPRINTF ((stderr, "Reading a token: ")); @@ -1546,7 +1571,7 @@ yybackup: else { yytoken = YYTRANSLATE (yychar); - YYDSYMPRINTF ("Next token is", yytoken, &yylval, &yylloc); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); } /* If the proper action on seeing token YYTOKEN is to reduce or to @@ -1566,8 +1591,8 @@ yybackup: if (yyn == YYFINAL) YYACCEPT; - /* Shift the lookahead token. */ - YYDPRINTF ((stderr, "Shifting token %s, ", yytname[yytoken])); + /* Shift the look-ahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); /* Discard the token being shifted unless it is eof. */ if (yychar != YYEOF) @@ -1618,277 +1643,277 @@ yyreduce: { case 25: #line 166 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} + { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;} break; case 26: #line 168 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;} + { (yyval) = que_node_list_add_last((yyvsp[-1]), (yyvsp[0])); ;} break; case 27: #line 172 "pars0grm.y" - { yyval = yyvsp[0];;} + { (yyval) = (yyvsp[0]);;} break; case 28: #line 174 "pars0grm.y" - { yyval = pars_func(yyvsp[-3], yyvsp[-1]); ;} + { (yyval) = pars_func((yyvsp[-3]), (yyvsp[-1])); ;} break; case 29: #line 175 "pars0grm.y" - { yyval = yyvsp[0];;} + { (yyval) = (yyvsp[0]);;} break; case 30: #line 176 "pars0grm.y" - { yyval = yyvsp[0];;} + { (yyval) = (yyvsp[0]);;} break; case 31: #line 177 "pars0grm.y" - { yyval = yyvsp[0];;} + { (yyval) = (yyvsp[0]);;} break; case 32: #line 178 "pars0grm.y" - { yyval = yyvsp[0];;} + { (yyval) = (yyvsp[0]);;} break; case 33: #line 179 "pars0grm.y" - { yyval = yyvsp[0];;} + { (yyval) = (yyvsp[0]);;} break; case 34: #line 180 "pars0grm.y" - { yyval = yyvsp[0];;} + { (yyval) = (yyvsp[0]);;} break; case 35: #line 181 "pars0grm.y" - { yyval = yyvsp[0];;} + { (yyval) = (yyvsp[0]);;} break; case 36: #line 182 "pars0grm.y" - { yyval = pars_op('+', yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op('+', (yyvsp[-2]), (yyvsp[0])); ;} break; case 37: #line 183 "pars0grm.y" - { yyval = pars_op('-', yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op('-', (yyvsp[-2]), (yyvsp[0])); ;} break; case 38: #line 184 "pars0grm.y" - { yyval = pars_op('*', yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op('*', (yyvsp[-2]), (yyvsp[0])); ;} break; case 39: #line 185 "pars0grm.y" - { yyval = pars_op('/', yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op('/', (yyvsp[-2]), (yyvsp[0])); ;} break; case 40: #line 186 "pars0grm.y" - { yyval = pars_op('-', yyvsp[0], NULL); ;} + { (yyval) = pars_op('-', (yyvsp[0]), NULL); ;} break; case 41: #line 187 "pars0grm.y" - { yyval = yyvsp[-1]; ;} + { (yyval) = (yyvsp[-1]); ;} break; case 42: #line 188 "pars0grm.y" - { yyval = pars_op('=', yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op('=', (yyvsp[-2]), (yyvsp[0])); ;} break; case 43: #line 189 "pars0grm.y" - { yyval = pars_op('<', yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op('<', (yyvsp[-2]), (yyvsp[0])); ;} break; case 44: #line 190 "pars0grm.y" - { yyval = pars_op('>', yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op('>', (yyvsp[-2]), (yyvsp[0])); ;} break; case 45: #line 191 "pars0grm.y" - { yyval = pars_op(PARS_GE_TOKEN, yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op(PARS_GE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;} break; case 46: #line 192 "pars0grm.y" - { yyval = pars_op(PARS_LE_TOKEN, yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op(PARS_LE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;} break; case 47: #line 193 "pars0grm.y" - { yyval = pars_op(PARS_NE_TOKEN, yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op(PARS_NE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;} break; case 48: #line 194 "pars0grm.y" - { yyval = pars_op(PARS_AND_TOKEN, yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op(PARS_AND_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;} break; case 49: #line 195 "pars0grm.y" - { yyval = pars_op(PARS_OR_TOKEN, yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_op(PARS_OR_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;} break; case 50: #line 196 "pars0grm.y" - { yyval = pars_op(PARS_NOT_TOKEN, yyvsp[0], NULL); ;} + { (yyval) = pars_op(PARS_NOT_TOKEN, (yyvsp[0]), NULL); ;} break; case 51: #line 198 "pars0grm.y" - { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;} + { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[-2]), NULL); ;} break; case 52: #line 200 "pars0grm.y" - { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;} + { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[-2]), NULL); ;} break; case 53: #line 204 "pars0grm.y" - { yyval = &pars_to_char_token; ;} + { (yyval) = &pars_to_char_token; ;} break; case 54: #line 205 "pars0grm.y" - { yyval = &pars_to_number_token; ;} + { (yyval) = &pars_to_number_token; ;} break; case 55: #line 206 "pars0grm.y" - { yyval = &pars_to_binary_token; ;} + { (yyval) = &pars_to_binary_token; ;} break; case 56: #line 208 "pars0grm.y" - { yyval = &pars_binary_to_number_token; ;} + { (yyval) = &pars_binary_to_number_token; ;} break; case 57: #line 209 "pars0grm.y" - { yyval = &pars_substr_token; ;} + { (yyval) = &pars_substr_token; ;} break; case 58: #line 210 "pars0grm.y" - { yyval = &pars_concat_token; ;} + { (yyval) = &pars_concat_token; ;} break; case 59: #line 211 "pars0grm.y" - { yyval = &pars_instr_token; ;} + { (yyval) = &pars_instr_token; ;} break; case 60: #line 212 "pars0grm.y" - { yyval = &pars_length_token; ;} + { (yyval) = &pars_length_token; ;} break; case 61: #line 213 "pars0grm.y" - { yyval = &pars_sysdate_token; ;} + { (yyval) = &pars_sysdate_token; ;} break; case 62: #line 214 "pars0grm.y" - { yyval = &pars_rnd_token; ;} + { (yyval) = &pars_rnd_token; ;} break; case 63: #line 215 "pars0grm.y" - { yyval = &pars_rnd_str_token; ;} + { (yyval) = &pars_rnd_str_token; ;} break; case 67: #line 226 "pars0grm.y" - { yyval = pars_stored_procedure_call(yyvsp[-4]); ;} + { (yyval) = pars_stored_procedure_call((yyvsp[-4])); ;} break; case 68: #line 231 "pars0grm.y" - { yyval = pars_procedure_call(yyvsp[-3], yyvsp[-1]); ;} + { (yyval) = pars_procedure_call((yyvsp[-3]), (yyvsp[-1])); ;} break; case 69: #line 235 "pars0grm.y" - { yyval = &pars_replstr_token; ;} + { (yyval) = &pars_replstr_token; ;} break; case 70: #line 236 "pars0grm.y" - { yyval = &pars_printf_token; ;} + { (yyval) = &pars_printf_token; ;} break; case 71: #line 237 "pars0grm.y" - { yyval = &pars_assert_token; ;} + { (yyval) = &pars_assert_token; ;} break; case 72: #line 241 "pars0grm.y" - { yyval = yyvsp[-2]; ;} + { (yyval) = (yyvsp[-2]); ;} break; case 73: #line 245 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} + { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;} break; case 74: #line 247 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;} break; case 75: #line 251 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 76: #line 252 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} + { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;} break; case 77: #line 254 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;} break; case 78: #line 258 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 79: #line 259 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]);;} + { (yyval) = que_node_list_add_last(NULL, (yyvsp[0]));;} break; case 80: #line 260 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;} break; case 81: #line 264 "pars0grm.y" - { yyval = yyvsp[0]; ;} + { (yyval) = (yyvsp[0]); ;} break; case 82: #line 266 "pars0grm.y" - { yyval = pars_func(&pars_count_token, + { (yyval) = pars_func(&pars_count_token, que_node_list_add_last(NULL, sym_tab_add_int_lit( pars_sym_tab_global, 1))); ;} @@ -1896,74 +1921,74 @@ yyreduce: case 83: #line 271 "pars0grm.y" - { yyval = pars_func(&pars_count_token, + { (yyval) = pars_func(&pars_count_token, que_node_list_add_last(NULL, pars_func(&pars_distinct_token, que_node_list_add_last( - NULL, yyvsp[-1])))); ;} + NULL, (yyvsp[-1]))))); ;} break; case 84: #line 277 "pars0grm.y" - { yyval = pars_func(&pars_sum_token, + { (yyval) = pars_func(&pars_sum_token, que_node_list_add_last(NULL, - yyvsp[-1])); ;} + (yyvsp[-1]))); ;} break; case 85: #line 283 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 86: #line 284 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} + { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;} break; case 87: #line 286 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;} break; case 88: #line 290 "pars0grm.y" - { yyval = pars_select_list(&pars_star_denoter, + { (yyval) = pars_select_list(&pars_star_denoter, NULL); ;} break; case 89: #line 293 "pars0grm.y" - { yyval = pars_select_list(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_select_list((yyvsp[-2]), (yyvsp[0])); ;} break; case 90: #line 294 "pars0grm.y" - { yyval = pars_select_list(yyvsp[0], NULL); ;} + { (yyval) = pars_select_list((yyvsp[0]), NULL); ;} break; case 91: #line 298 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 92: #line 299 "pars0grm.y" - { yyval = yyvsp[0]; ;} + { (yyval) = (yyvsp[0]); ;} break; case 93: #line 303 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 94: #line 305 "pars0grm.y" - { yyval = &pars_update_token; ;} + { (yyval) = &pars_update_token; ;} break; case 95: #line 309 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 96: @@ -1973,368 +1998,368 @@ yyreduce: case 97: #line 315 "pars0grm.y" - { yyval = &pars_asc_token; ;} + { (yyval) = &pars_asc_token; ;} break; case 98: #line 316 "pars0grm.y" - { yyval = &pars_asc_token; ;} + { (yyval) = &pars_asc_token; ;} break; case 99: #line 317 "pars0grm.y" - { yyval = &pars_desc_token; ;} + { (yyval) = &pars_desc_token; ;} break; case 100: #line 321 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 101: #line 323 "pars0grm.y" - { yyval = pars_order_by(yyvsp[-1], yyvsp[0]); ;} + { (yyval) = pars_order_by((yyvsp[-1]), (yyvsp[0])); ;} break; case 102: #line 332 "pars0grm.y" - { yyval = pars_select_statement(yyvsp[-6], yyvsp[-4], yyvsp[-3], - yyvsp[-2], yyvsp[-1], yyvsp[0]); ;} + { (yyval) = pars_select_statement((yyvsp[-6]), (yyvsp[-4]), (yyvsp[-3]), + (yyvsp[-2]), (yyvsp[-1]), (yyvsp[0])); ;} break; case 103: #line 338 "pars0grm.y" - { yyval = yyvsp[0]; ;} + { (yyval) = (yyvsp[0]); ;} break; case 104: #line 343 "pars0grm.y" - { yyval = pars_insert_statement(yyvsp[-4], yyvsp[-1], NULL); ;} + { (yyval) = pars_insert_statement((yyvsp[-4]), (yyvsp[-1]), NULL); ;} break; case 105: #line 345 "pars0grm.y" - { yyval = pars_insert_statement(yyvsp[-1], NULL, yyvsp[0]); ;} + { (yyval) = pars_insert_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;} break; case 106: #line 349 "pars0grm.y" - { yyval = pars_column_assignment(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_column_assignment((yyvsp[-2]), (yyvsp[0])); ;} break; case 107: #line 353 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} + { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;} break; case 108: #line 355 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;} break; case 109: #line 361 "pars0grm.y" - { yyval = yyvsp[0]; ;} + { (yyval) = (yyvsp[0]); ;} break; case 110: #line 367 "pars0grm.y" - { yyval = pars_update_statement_start(FALSE, - yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_update_statement_start(FALSE, + (yyvsp[-2]), (yyvsp[0])); ;} break; case 111: #line 373 "pars0grm.y" - { yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;} + { (yyval) = pars_update_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;} break; case 112: #line 378 "pars0grm.y" - { yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;} + { (yyval) = pars_update_statement((yyvsp[-1]), (yyvsp[0]), NULL); ;} break; case 113: #line 383 "pars0grm.y" - { yyval = pars_update_statement_start(TRUE, - yyvsp[0], NULL); ;} + { (yyval) = pars_update_statement_start(TRUE, + (yyvsp[0]), NULL); ;} break; case 114: #line 389 "pars0grm.y" - { yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;} + { (yyval) = pars_update_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;} break; case 115: #line 394 "pars0grm.y" - { yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;} + { (yyval) = pars_update_statement((yyvsp[-1]), (yyvsp[0]), NULL); ;} break; case 116: #line 399 "pars0grm.y" - { yyval = pars_row_printf_statement(yyvsp[0]); ;} + { (yyval) = pars_row_printf_statement((yyvsp[0])); ;} break; case 117: #line 404 "pars0grm.y" - { yyval = pars_assignment_statement(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_assignment_statement((yyvsp[-2]), (yyvsp[0])); ;} break; case 118: #line 410 "pars0grm.y" - { yyval = pars_elsif_element(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_elsif_element((yyvsp[-2]), (yyvsp[0])); ;} break; case 119: #line 414 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} + { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;} break; case 120: #line 416 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;} + { (yyval) = que_node_list_add_last((yyvsp[-1]), (yyvsp[0])); ;} break; case 121: #line 420 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 122: #line 422 "pars0grm.y" - { yyval = yyvsp[0]; ;} + { (yyval) = (yyvsp[0]); ;} break; case 123: #line 423 "pars0grm.y" - { yyval = yyvsp[0]; ;} + { (yyval) = (yyvsp[0]); ;} break; case 124: #line 430 "pars0grm.y" - { yyval = pars_if_statement(yyvsp[-5], yyvsp[-3], yyvsp[-2]); ;} + { (yyval) = pars_if_statement((yyvsp[-5]), (yyvsp[-3]), (yyvsp[-2])); ;} break; case 125: #line 436 "pars0grm.y" - { yyval = pars_while_statement(yyvsp[-4], yyvsp[-2]); ;} + { (yyval) = pars_while_statement((yyvsp[-4]), (yyvsp[-2])); ;} break; case 126: #line 444 "pars0grm.y" - { yyval = pars_for_statement(yyvsp[-8], yyvsp[-6], yyvsp[-4], yyvsp[-2]); ;} + { (yyval) = pars_for_statement((yyvsp[-8]), (yyvsp[-6]), (yyvsp[-4]), (yyvsp[-2])); ;} break; case 127: #line 448 "pars0grm.y" - { yyval = pars_exit_statement(); ;} + { (yyval) = pars_exit_statement(); ;} break; case 128: #line 452 "pars0grm.y" - { yyval = pars_return_statement(); ;} + { (yyval) = pars_return_statement(); ;} break; case 129: #line 457 "pars0grm.y" - { yyval = pars_open_statement( - ROW_SEL_OPEN_CURSOR, yyvsp[0]); ;} + { (yyval) = pars_open_statement( + ROW_SEL_OPEN_CURSOR, (yyvsp[0])); ;} break; case 130: #line 463 "pars0grm.y" - { yyval = pars_open_statement( - ROW_SEL_CLOSE_CURSOR, yyvsp[0]); ;} + { (yyval) = pars_open_statement( + ROW_SEL_CLOSE_CURSOR, (yyvsp[0])); ;} break; case 131: #line 469 "pars0grm.y" - { yyval = pars_fetch_statement(yyvsp[-2], yyvsp[0], NULL); ;} + { (yyval) = pars_fetch_statement((yyvsp[-2]), (yyvsp[0]), NULL); ;} break; case 132: #line 471 "pars0grm.y" - { yyval = pars_fetch_statement(yyvsp[-2], NULL, yyvsp[0]); ;} + { (yyval) = pars_fetch_statement((yyvsp[-2]), NULL, (yyvsp[0])); ;} break; case 133: #line 476 "pars0grm.y" - { yyval = pars_column_def(yyvsp[-4], yyvsp[-3], yyvsp[-2], yyvsp[-1], yyvsp[0]); ;} + { (yyval) = pars_column_def((yyvsp[-4]), (yyvsp[-3]), (yyvsp[-2]), (yyvsp[-1]), (yyvsp[0])); ;} break; case 134: #line 480 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} + { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;} break; case 135: #line 482 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;} break; case 136: #line 486 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 137: #line 488 "pars0grm.y" - { yyval = yyvsp[-1]; ;} + { (yyval) = (yyvsp[-1]); ;} break; case 138: #line 492 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 139: #line 494 "pars0grm.y" - { yyval = &pars_int_token; + { (yyval) = &pars_int_token; /* pass any non-NULL pointer */ ;} break; case 140: #line 499 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 141: #line 501 "pars0grm.y" - { yyval = &pars_int_token; + { (yyval) = &pars_int_token; /* pass any non-NULL pointer */ ;} break; case 142: #line 506 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 143: #line 508 "pars0grm.y" - { yyval = &pars_int_token; + { (yyval) = &pars_int_token; /* pass any non-NULL pointer */ ;} break; case 144: #line 515 "pars0grm.y" - { yyval = pars_create_table(yyvsp[-4], yyvsp[-2], yyvsp[0]); ;} + { (yyval) = pars_create_table((yyvsp[-4]), (yyvsp[-2]), (yyvsp[0])); ;} break; case 145: #line 519 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} + { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;} break; case 146: #line 521 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;} break; case 147: #line 525 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 148: #line 526 "pars0grm.y" - { yyval = &pars_unique_token; ;} + { (yyval) = &pars_unique_token; ;} break; case 149: #line 530 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 150: #line 531 "pars0grm.y" - { yyval = &pars_clustered_token; ;} + { (yyval) = &pars_clustered_token; ;} break; case 151: #line 539 "pars0grm.y" - { yyval = pars_create_index(yyvsp[-8], yyvsp[-7], yyvsp[-5], yyvsp[-3], yyvsp[-1]); ;} + { (yyval) = pars_create_index((yyvsp[-8]), (yyvsp[-7]), (yyvsp[-5]), (yyvsp[-3]), (yyvsp[-1])); ;} break; case 152: #line 544 "pars0grm.y" - { yyval = pars_commit_statement(); ;} + { (yyval) = pars_commit_statement(); ;} break; case 153: #line 549 "pars0grm.y" - { yyval = pars_rollback_statement(); ;} + { (yyval) = pars_rollback_statement(); ;} break; case 154: #line 553 "pars0grm.y" - { yyval = &pars_int_token; ;} + { (yyval) = &pars_int_token; ;} break; case 155: #line 554 "pars0grm.y" - { yyval = &pars_int_token; ;} + { (yyval) = &pars_int_token; ;} break; case 156: #line 555 "pars0grm.y" - { yyval = &pars_char_token; ;} + { (yyval) = &pars_char_token; ;} break; case 157: #line 556 "pars0grm.y" - { yyval = &pars_binary_token; ;} + { (yyval) = &pars_binary_token; ;} break; case 158: #line 557 "pars0grm.y" - { yyval = &pars_blob_token; ;} + { (yyval) = &pars_blob_token; ;} break; case 159: #line 562 "pars0grm.y" - { yyval = pars_parameter_declaration(yyvsp[-2], - PARS_INPUT, yyvsp[0]); ;} + { (yyval) = pars_parameter_declaration((yyvsp[-2]), + PARS_INPUT, (yyvsp[0])); ;} break; case 160: #line 565 "pars0grm.y" - { yyval = pars_parameter_declaration(yyvsp[-2], - PARS_OUTPUT, yyvsp[0]); ;} + { (yyval) = pars_parameter_declaration((yyvsp[-2]), + PARS_OUTPUT, (yyvsp[0])); ;} break; case 161: #line 570 "pars0grm.y" - { yyval = NULL; ;} + { (yyval) = NULL; ;} break; case 162: #line 571 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} + { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;} break; case 163: #line 573 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} + { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;} break; case 164: #line 578 "pars0grm.y" - { yyval = pars_variable_declaration(yyvsp[-2], yyvsp[-1]); ;} + { (yyval) = pars_variable_declaration((yyvsp[-2]), (yyvsp[-1])); ;} break; case 168: #line 590 "pars0grm.y" - { yyval = pars_cursor_declaration(yyvsp[-3], yyvsp[-1]); ;} + { (yyval) = pars_cursor_declaration((yyvsp[-3]), (yyvsp[-1])); ;} break; case 169: #line 595 "pars0grm.y" - { yyval = pars_function_declaration(yyvsp[-1]); ;} + { (yyval) = pars_function_declaration((yyvsp[-1])); ;} break; case 175: #line 616 "pars0grm.y" - { yyval = pars_procedure_definition(yyvsp[-9], yyvsp[-7], - yyvsp[-1]); ;} + { (yyval) = pars_procedure_definition((yyvsp[-9]), (yyvsp[-7]), + (yyvsp[-1])); ;} break; @@ -2441,7 +2466,7 @@ yyerrlab: if (yyerrstatus == 3) { - /* If just tried and failed to reuse lookahead token after an + /* If just tried and failed to reuse look-ahead token after an error, discard it. */ if (yychar <= YYEOF) @@ -2451,23 +2476,22 @@ yyerrlab: if (yychar == YYEOF) for (;;) { + YYPOPSTACK; if (yyssp == yyss) YYABORT; - YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp); - yydestruct (yystos[*yyssp], yyvsp); + yydestruct ("Error: popping", + yystos[*yyssp], yyvsp); } } else { - YYDSYMPRINTF ("Error: discarding", yytoken, &yylval, &yylloc); - yydestruct (yytoken, &yylval); + yydestruct ("Error: discarding", yytoken, &yylval); yychar = YYEMPTY; - } } - /* Else will try to reuse lookahead token after shifting the error + /* Else will try to reuse look-ahead token after shifting the error token. */ goto yyerrlab1; @@ -2484,7 +2508,7 @@ yyerrorlab: goto yyerrorlab; #endif - yyvsp -= yylen; +yyvsp -= yylen; yyssp -= yylen; yystate = *yyssp; goto yyerrlab1; @@ -2514,8 +2538,8 @@ yyerrlab1: if (yyssp == yyss) YYABORT; - YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp); - yydestruct (yystos[yystate], yyvsp); + + yydestruct ("Error: popping", yystos[yystate], yyvsp); YYPOPSTACK; yystate = *yyssp; YY_STACK_PRINT (yyss, yyssp); @@ -2524,11 +2548,12 @@ yyerrlab1: if (yyn == YYFINAL) YYACCEPT; - YYDPRINTF ((stderr, "Shifting error token, ")); - *++yyvsp = yylval; + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + yystate = yyn; goto yynewstate; @@ -2544,6 +2569,9 @@ yyacceptlab: | yyabortlab -- YYABORT comes here. | `-----------------------------------*/ yyabortlab: + yydestruct ("Error: discarding lookahead", + yytoken, &yylval); + yychar = YYEMPTY; yyresult = 1; goto yyreturn; diff --git a/pars/pars0grm.h b/pars/pars0grm.h index 0062b8314ee..ea6c4c5f896 100644 --- a/pars/pars0grm.h +++ b/pars/pars0grm.h @@ -1,4 +1,4 @@ -/* A Bison parser, made by GNU Bison 1.875d. */ +/* A Bison parser, made by GNU Bison 2.0. */ /* Skeleton parser for Yacc-like parsing with Bison, Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. diff --git a/que/que0que.c b/que/que0que.c index bf83f28f04e..d6026c657ba 100644 --- a/que/que0que.c +++ b/que/que0que.c @@ -336,6 +336,9 @@ que_fork_start_command( fork->last_sel_node = NULL; + suspended_thr = NULL; + completed_thr = NULL; + /* Choose the query thread to run: usually there is just one thread, but in a parallelized select, which necessarily is non-scrollable, there may be several to choose from */ diff --git a/row/Makefile.am b/row/Makefile.am index bb551d0ffee..b6c5ef9a66b 100644 --- a/row/Makefile.am +++ b/row/Makefile.am @@ -17,7 +17,7 @@ include ../include/Makefile.i noinst_LIBRARIES = librow.a -librow_a_SOURCES = row0ext.c\ +librow_a_SOURCES = row0ext.c row0merge.c\ row0ins.c row0mysql.c row0purge.c row0row.c row0sel.c\ row0uins.c row0umod.c row0undo.c row0upd.c row0vers.c diff --git a/row/row0ins.c b/row/row0ins.c index 1a39ea0da06..f98d6a8f53c 100644 --- a/row/row0ins.c +++ b/row/row0ins.c @@ -103,7 +103,7 @@ ins_node_create( /*************************************************************** Creates an entry template for each index of a table. */ -static + void ins_node_create_entry_list( /*=======================*/ diff --git a/row/row0merge.c b/row/row0merge.c new file mode 100644 index 00000000000..187a486ca2b --- /dev/null +++ b/row/row0merge.c @@ -0,0 +1,2231 @@ +/****************************************************** +New index creation routines using a merge sort + +(c) 2005 Innobase Oy + +Created 12/4/2005 Jan Lindstrom +*******************************************************/ + +/****************************************************** +TODO: + +1. Run test with purify and valgrind and fix possible + errors found. + +2. Add more test cases and fix bugs founds. + +3. If we are using variable length keys, then in + some cases these keys do not fit into two empty blocks + in a different order. Therefore, some empty space is + left in every block. However, it has not been shown + that this empty space is enough for all cases. Therefore, + in the above case these overloaded records should be put + on another block. + +4. Run benchmarks. +*******************************************************/ + +#include "row0merge.h" +#include "row0ext.h" +#include "row0row.h" +#include "row0upd.h" +#include "row0ins.h" +#include "row0sel.h" +#include "dict0dict.h" +#include "dict0mem.h" +#include "dict0boot.h" +#include "dict0crea.h" +#include "dict0load.h" +#include "btr0btr.h" +#include "mach0data.h" +#include "trx0rseg.h" +#include "trx0trx.h" +#include "trx0roll.h" +#include "trx0undo.h" +#include "trx0purge.h" +#include "trx0rec.h" +#include "que0que.h" +#include "rem0cmp.h" +#include "read0read.h" +#include "os0file.h" +#include "lock0lock.h" +#include "data0data.h" +#include "data0type.h" +#include "que0que.h" +#include "pars0pars.h" +#include "mem0mem.h" +#include "log0log.h" + +static +dict_index_t* +row_merge_dict_table_get_index( +/*===========================*/ + dict_table_t* table, + const merge_index_def_t* + index_def) +{ + ulint i; + dict_index_t* index; + const char** column_names; + + column_names = (const char**) mem_alloc_noninline( + index_def->n_fields * sizeof(char*)); + + for (i = 0; i < index_def->n_fields; ++i) { + column_names[i] = index_def->fields[i]->field_name; + } + + index = dict_table_get_index_by_max_id( + table, index_def->name, column_names, index_def->n_fields); + + mem_free_noninline(column_names); + + return(index); +} + +/************************************************************************ +Creates and initializes a merge block */ +static +merge_block_t* +row_merge_block_create(void) +/*========================*/ + /* out: pointer to block */ +{ + merge_block_t* mblock; + + mblock = (merge_block_t*)mem_alloc(MERGE_BLOCK_SIZE); + + mblock->header.n_records = 0; + mblock->header.offset = ut_dulint_create(0, 0); + mblock->header.next = ut_dulint_create(0, 0); + + return(mblock); +} + +/************************************************************************ +Read a merge block from the disk */ +static +void +row_merge_block_read( +/*=================*/ + /* out: TRUE if request was + successful, FALSE if fail */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in/out: buffer where to read */ + dulint offset) /* in: offset where to read */ +{ + ut_ad(buf); + + os_file_read(file, buf, ut_dulint_get_low(offset), + ut_dulint_get_high(offset), MERGE_BLOCK_SIZE); +} + +/************************************************************************ +Read a merge block header from the disk */ +static +void +row_merge_block_header_read( +/*========================*/ + /* out: TRUE if request was + successful, FALSE if fail */ + os_file_t file, /* in: handle to a file */ + merge_block_header_t* header, /* in/out: buffer where to read */ + dulint offset) /* in: offset where to read */ +{ + ut_ad(header); + + os_file_read(file, header, ut_dulint_get_low(offset), + ut_dulint_get_high(offset), + sizeof(merge_block_header_t)); +} + +/************************************************************************ +Write a merge block header to the disk */ +static +void +row_merge_block_header_write( +/*=========================*/ + /* out: TRUE if request was + successful, FALSE if fail */ + os_file_t file, /* in: handle to a file */ + merge_block_header_t* header, /* in/out: buffer where to read */ + dulint offset) /* in: offset where to read */ +{ + ut_ad(header); + + os_file_write("(merge)", file, header, ut_dulint_get_low(offset), + ut_dulint_get_high(offset), sizeof(merge_block_header_t)); +} + +/************************************************************************ +Write a merge block to the disk */ +static +void +row_merge_block_write( +/*==================*/ + /* out: TRUE if request was + successful, FALSE if fail */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where write from */ + dulint offset) /* in: offset where write to */ +{ + ut_ad(buf); + + os_file_write("(merge)", file, buf, ut_dulint_get_low(offset), + ut_dulint_get_high(offset), MERGE_BLOCK_SIZE); +} + +/************************************************************** +Create a merge record and copy a index data tuple to the merge +record */ +static +merge_rec_t* +row_merge_rec_create( +/*=================*/ + /* out: merge record */ + const dtuple_t* dtuple, /* in: data tuple */ + const ulint* ext, /* in: array of extern field numbers */ + ulint n_ext, /* in: number of elements in ext */ + dict_index_t* index, /* in: index record descriptor */ + mem_heap_t* heap) /* in: heap where memory is allocated */ +{ + merge_rec_t* m_rec; + ulint rec_size; + byte* buf; + + ut_ad(dtuple && index && heap); + ut_ad(dtuple_validate(dtuple)); + + m_rec = (merge_rec_t*) mem_heap_alloc(heap, sizeof(merge_rec_t)); + + rec_size = rec_get_converted_size(index, dtuple, ext, n_ext); + buf = mem_heap_alloc(heap, rec_size); + + m_rec->rec = rec_convert_dtuple_to_rec(buf, index, dtuple, + ext, n_ext); + m_rec->next = NULL; + + return(m_rec); +} + +/************************************************************************ +Checks that a record fits to a block */ +static +ibool +row_merge_rec_fits_to_block( +/*========================*/ + /* out: TRUE if record fits to merge block, + FALSE if record does not fit to block */ + ulint* offsets,/* in: record offsets */ + ulint offset) /* in: offset where to store in the block */ +{ + ulint rec_len; + + ut_ad(offsets); + + rec_len = mach_get_compressed_size(rec_offs_extra_size(offsets)) + + rec_offs_size(offsets); + + /* Note that we intentionally leave free space on + every block. This free space might be later needed when two + blocks are merged and variable length keys are used. Variable + length keys on two blocks might be interleaved on such a manner + that they do not fit on two blocks if blocks are too full */ + + return((offset + rec_len) < (MERGE_BLOCK_SIZE + - MERGE_BLOCK_SAFETY_MARGIN + - sizeof(merge_block_header_t))); +} + +/************************************************************************ +Store a record to a merge file block. Note that this function does +not check that the record fits to the block. */ +static +ulint +row_merge_store_rec_to_block( +/*=========================*/ + /* out: offset for next data tuple */ + rec_t* rec, /* in: record to be stored in the memory */ + ulint* offsets,/* in: record offsets */ + merge_block_t* mblock, /* in: block where data tuple is stored */ + ulint offset) /* in: offset where to store */ +{ + char* dest_data; + ulint rec_len; + ulint extra_len; + ulint storage_size; + + ut_ad(rec && mblock && offsets); + ut_ad(rec_validate(rec, offsets)); + + /* Find the position in the block where this data tuple is stored. + If we are at the start of the block, remember to add size of header + to the offset */ + + if (offset == 0) { + dest_data = mblock->data; + } else { + dest_data = ((char *)mblock + offset); + } + + ut_ad(dest_data < ((char *)mblock + MERGE_BLOCK_SIZE)); + + extra_len = rec_offs_extra_size(offsets); + rec_len = rec_offs_size(offsets); + + /* 1. Store the extra_len */ + storage_size = mach_write_compressed((byte *)dest_data, extra_len); + dest_data+=storage_size; + ut_ad(dest_data < ((char *)mblock + MERGE_BLOCK_SIZE)); + + /* 2. Store the record */ + memcpy(dest_data, rec - extra_len, rec_len); + dest_data+=rec_len; + ut_ad(dest_data < ((char *)mblock + MERGE_BLOCK_SIZE)); + + mblock->header.n_records++; + + /* Return next offset */ + return((char *)dest_data - (char *)mblock); +} + +/************************************************************************ +Read a record from the block */ +static +merge_rec_t* +row_merge_read_rec_from_block( +/*==========================*/ + /* out: record or NULL*/ + merge_block_t* mblock, /* in: memory block where to read */ + ulint* offset, /* in/out: offset where to read a record */ + mem_heap_t* heap, /* in: heap were this memory for this record + is allocated */ + dict_index_t* index) /* in: index record desriptor */ +{ + merge_rec_t* mrec; + char* from_data; + ulint extra_len; + ulint data_len; + ulint tmp_offset; + ulint storage_len; + rec_t* rec; + mem_heap_t* offset_heap = NULL; + ulint sec_offsets_[REC_OFFS_SMALL_SIZE]; + ulint* sec_offs = sec_offsets_; + + *sec_offsets_ = (sizeof sec_offsets_) / sizeof *sec_offsets_; + + ut_ad(mblock && offset && heap); + + tmp_offset = *offset; + + /* Find the position in the block where this data tuple is stored. + If we are at the start of the block, remember to add size of header + to the offset */ + + if (tmp_offset == 0) { + from_data = mblock->data; + } else { + from_data = ((char *)mblock + tmp_offset); + } + + ut_ad(from_data < ((char *)mblock + MERGE_BLOCK_SIZE)); + + mrec = mem_heap_alloc(heap, sizeof(merge_rec_t)); + + /* 1. Read the extra len and calculate its storage length */ + extra_len = mach_read_compressed((byte *)from_data); + storage_len = mach_get_compressed_size(extra_len); + from_data+=storage_len; + ut_ad(from_data < ((char *)mblock + MERGE_BLOCK_SIZE)); + + /* 2. Read the record */ + rec = (rec_t*)(from_data + extra_len); + mrec->rec = rec; + sec_offs = rec_get_offsets(mrec->rec, index, sec_offs, ULINT_UNDEFINED, + &offset_heap); + data_len = rec_offs_size(sec_offs); + ut_ad(rec_validate(rec, sec_offs)); + + from_data+=data_len; + ut_ad(from_data < ((char *)mblock + MERGE_BLOCK_SIZE)); + + /* Return also start offset of the next data tuple */ + *offset = ((char *)from_data - (char *)mblock); + + if (offset_heap) { + mem_heap_free(offset_heap); + } + + return(mrec); +} + +/***************************************************************** +Compare a merge record to another merge record. Returns +1) NULL if unique index is to be created and records are identical +2) first record if the fist record is smaller than the second record +3) first record if records are identical and index type is not UNIQUE +4) second record if the first record is largen than second record*/ +static +merge_rec_t* +row_merge_select( +/*=============*/ + /* out: record or NULL */ + merge_rec_t* mrec1, /* in: first merge record to be + compared */ + merge_rec_t* mrec2, /* in: second merge record to be + compared */ + ulint* offsets1, /* in: first record offsets */ + ulint* offsets2, /* in: second record offsets */ + dict_index_t* index, /* in: index */ + int* selected) /* in/out: selected record */ +{ + int cmp_res = 0; + + ut_ad(mrec1 && mrec2 && offsets1 && offsets2 && index && selected); + ut_ad(rec_validate(mrec1->rec, offsets1)); + ut_ad(rec_validate(mrec2->rec, offsets2)); + + cmp_res = cmp_rec_rec(mrec1->rec, mrec2->rec, offsets1, + offsets2, index); + + if (cmp_res <= 0) { + + if (cmp_res == 0 && (index->type & DICT_UNIQUE)) { + /* Attribute contains two identical keys and + index should be unique. Thus, duplicate + key error should be generated. Now return NULL */ + + return(NULL); + } + + *selected=1; + + return(mrec1); + } else { + *selected=2; + + return(mrec2); + } +} + +/***************************************************************** +Merge sort for linked list in memory. + +Merge sort takes the input list and makes log N passes along +the list and in each pass it combines each adjacent pair of +small sorted lists into one larger sorted list. When only a one +pass is needed the whole output list must be sorted. + +In each pass, two lists of size block_size are merged into lists of +size block_size*2. Initially block_size=1. Merge starts by pointing +a temporary pointer list1 at the head of the list and also preparing +an empty list list_tail which we will add elements to the end. Then: + + 1) If list1 is NULL we terminate this pass. + + 2) Otherwise, there is at least one element in the next + pair of block_size lists therefore, increase the number of + merges performed in this pass. + + 3) Point another temporary pointer list2 as the same + place as list1. Iterate list2 by block_size elements + or until the end of the list. Let the list_size1 be the + number of elements in the list2. + + 4) Let list_size1=merge_size. Now we merge list starting at + list1 of length list_size2 with a list starting at list2 of + length at most list_size1. + + 5) So, as long as either the list1 is non-empty (list_size1) + or the list2 is non-empty (list_size2 and list2 pointing to + a element): + + 5.1) Select which list to take the next element from. + If either lists is empty, we choose from the other one. + If both lists are non-empty, compare the first element + of each and choose the lower one. + + 5.2) Remove that element, tmp, from the start of its + lists, by advancing list1 or list2 to next element + and decreasing list1_size or list2_size. + + 5.3) Add tmp to the end of the list_tail + + 6) At this point, we have advanced list1 until it is where + list2 started out and we have advanced list2 until it is + pointing at the next pair of block_size lists to merge. + Thus, set list1 to the value of list2 and go back to the + start of this loop. + +As soon as a pass like this is performed with only one merge, the +algorithm terminates and output list list_head is sorted. Otherwise, +double the value of block_size and go back to the beginning. */ +static +ulint +row_merge_sort_linked_list( +/*=======================*/ + /* out: 1 or 0 in case of error */ + dict_index_t* index, /* in: index to be created */ + merge_rec_list_t* list) /* in: Pointer to head element */ +{ + merge_rec_t* list1; + merge_rec_t* list2; + merge_rec_t* tmp; + merge_rec_t* list_head; + merge_rec_t* list_tail; + ulint block_size; + ulint num_of_merges; + ulint list1_size; + ulint list2_size; + ulint i; + mem_heap_t* offset_heap = NULL; + ulint sec_offsets1_[REC_OFFS_SMALL_SIZE]; + ulint* sec_offs1 = sec_offsets1_; + ulint sec_offsets2_[REC_OFFS_SMALL_SIZE]; + ulint* sec_offs2 = sec_offsets2_; + + ut_ad(list && list->head && index); + + *sec_offsets1_ = (sizeof sec_offsets1_) / sizeof *sec_offsets1_; + *sec_offsets2_ = (sizeof sec_offsets2_) / sizeof *sec_offsets2_; + + block_size = 1; /* We start from block size 1 */ + + list_head = list->head; + + for (;;) { + list1 = list_head; + list_head = NULL; + list_tail = NULL; + num_of_merges = 0; /* We count number of merges we do in + this pass */ + + while (list1) { + num_of_merges++; + + list2 = list1; + list1_size = 0; + + /* Step at most block_size elements along from + list2. */ + + for (i = 0; i < block_size; i++) { + list1_size++; + list2 = list2->next; + + if (!list2) { + break; + } + } + + list2_size = block_size; + + /* If list2 is not NULL, we have two lists to merge. + Otherwice, we have a sorted list. */ + + while (list1_size > 0 || (list2_size > 0 && list2)) { + /* Merge sort two lists by deciding whether + next element of merge comes from list1 or + list2. */ + + if (list1_size == 0) { + /* First list is empty, next element + must come from the second list. */ + + tmp = list2; + list2 = list2->next; + list2_size--; + } else if (list2_size == 0 || !list2) { + /* Second list is empty, next element + must come from the first list. */ + + tmp = list1; + list1 = list1->next; + list1_size--; + } else { + int selected = 1; + + sec_offs1 = rec_get_offsets(list1->rec, + index, + sec_offs1, + ULINT_UNDEFINED, + &offset_heap); + + sec_offs2 = rec_get_offsets(list2->rec, + index, + sec_offs2, + ULINT_UNDEFINED, + &offset_heap); + + + tmp = row_merge_select(list1, + list2, + sec_offs1, + sec_offs2, + index, + &selected); + + if (UNIV_UNLIKELY(tmp == NULL)) { + + if (offset_heap) { + mem_heap_free( + offset_heap); + } + + return(0); + } + + if (selected == 1) { + list1 = list1->next; + list1_size--; + } else { + list2 = list2->next; + list2_size--; + } + } + + /* Add selected element to the merged list */ + + if (list_tail) { + list_tail->next = tmp; + } else { + list_head = tmp; + } + + list_tail = tmp; + } + + /* Now we have processed block_size items from list1. */ + + list1 = list2; + } + + list_tail->next = NULL; + + /* If we have done oly one merge, we have created a sorted + list */ + + if (num_of_merges <= 1) { + list->head = list_head; + + if (offset_heap) { + mem_heap_free(offset_heap); + } + + return(1); + } else { + /* Otherwise merge lists twice the size */ + block_size *= 2; + } + } +} + +/***************************************************************** +Create and initialize record list used for in-memory merge sort */ +static +merge_rec_list_t* +row_merge_create_list(void) +/*=======================*/ + /* out: pointer to list */ +{ + merge_rec_list_t* list_header; + mem_heap_t* heap = NULL; + + /* Create list header */ + heap = mem_heap_create((MERGE_BLOCK_SIZE + sizeof(merge_rec_list_t))); + + list_header = mem_heap_alloc(heap, sizeof(merge_rec_list_t)); + + list_header->head = NULL; + list_header->tail = NULL; + list_header->n_records = 0; + list_header->total_size = sizeof(merge_rec_list_t); + list_header->heap = heap; + + return(list_header); +} + +/***************************************************************** +Add one record to the merge list */ +static +void +row_merge_list_add( +/*===============*/ + merge_rec_t* m_rec, /* in: record to be + inserted to the list */ + ulint rec_len, /* in: record length */ + merge_rec_list_t* list_header) /* in/out: list header */ +{ + ut_ad(m_rec && list_header); + + m_rec->next = NULL; + list_header->total_size+=rec_len; + + if (list_header->tail == NULL) { + + list_header->tail = list_header->head = m_rec; + } else { + list_header->tail->next = m_rec; + list_header->tail = m_rec; + } + + list_header->n_records++; +} + +/***************************************************************** +Write records from the list to the merge block */ +static +merge_rec_list_t* +row_merge_write_list_to_block( +/*==========================*/ + /* out: pointer to a new list + where rest of the items are stored */ + merge_rec_list_t* list, /* in: Record list */ + merge_block_t* output, /* in: Pointer to block */ + dict_index_t* index) /* in: Record descriptor */ +{ + ulint offset = 0; + merge_rec_t* m_rec = NULL; + merge_rec_list_t* new_list = NULL; + mem_heap_t* heap = NULL; + ulint sec_offsets_[REC_OFFS_SMALL_SIZE]; + ulint* sec_offs = sec_offsets_; + + ut_ad(list && output && index); + + *sec_offsets_ = (sizeof sec_offsets_) / sizeof *sec_offsets_; + output->header.n_records = 0; + + /* Write every record which fits to block to the block */ + + m_rec = list->head; + + while (m_rec) { + + sec_offs = rec_get_offsets(m_rec->rec, index, sec_offs, + ULINT_UNDEFINED, &heap); + + if (!row_merge_rec_fits_to_block(sec_offs, offset)) { + break; + } + + offset = row_merge_store_rec_to_block(m_rec->rec, + sec_offs, output, offset); + + m_rec = m_rec->next; + list->n_records--; + } + + /* Now create a new list and store rest of the records there. + Note that records must be copied because we deallocate memory + allocated for the original list. */ + + new_list = row_merge_create_list(); + + while (m_rec) { + rec_t* rec; + merge_rec_t* n_rec; + void* buff; + + *sec_offsets_ = (sizeof sec_offsets_) / sizeof *sec_offsets_; + + sec_offs = rec_get_offsets(m_rec->rec, index, sec_offs, + ULINT_UNDEFINED, &heap); + + buff = mem_heap_alloc(new_list->heap, + rec_offs_size(sec_offs)); + + n_rec = mem_heap_alloc(new_list->heap, sizeof(merge_rec_t)); + rec = rec_copy(buff, m_rec->rec, sec_offs); + n_rec->rec = rec; + row_merge_list_add(n_rec, rec_offs_size(sec_offs), new_list); + m_rec = m_rec->next; + } + + /* We can now free original list */ + mem_heap_free(list->heap); + + if (heap) { + mem_heap_free(heap); + } + + return(new_list); +} + +#ifdef UNIV_DEBUG +/************************************************************************* +Validate contents of the block */ +static +ibool +row_merge_block_validate( +/*=====================*/ + merge_block_t* block, /* in: block to be printed */ + dict_index_t* index) /* in: record descriptor */ +{ + merge_rec_t* mrec; + ulint offset = 0; + ulint n_recs = 0; + mem_heap_t* heap; + ulint sec_offsets1_[REC_OFFS_SMALL_SIZE]; + ulint* sec_offs1 = sec_offsets1_; + *sec_offsets1_ = (sizeof sec_offsets1_) / sizeof *sec_offsets1_; + + ut_a(block && index); + + heap = mem_heap_create(1024); + + fprintf(stderr, + "Block validate %lu records, " + "offset (%lu %lu), next (%lu %lu)\n", + block->header.n_records, + ut_dulint_get_low(block->header.offset), + ut_dulint_get_high(block->header.offset), + ut_dulint_get_low(block->header.next), + ut_dulint_get_high(block->header.next)); + + ut_a(block->header.n_records > 0); + + for (n_recs = 0; n_recs < block->header.n_records; n_recs++) { + + mrec = row_merge_read_rec_from_block(block, &offset, heap, + index); + + sec_offs1 = rec_get_offsets(mrec->rec, index, sec_offs1, + ULINT_UNDEFINED, &heap); + + ut_a(rec_validate(mrec->rec, sec_offs1)); + + mem_heap_empty(heap); + } + + mem_heap_free(heap); + + return(TRUE); +} +#endif /* UNIV_DEBUG */ + +/************************************************************************* +Merge two blocks resulting a two sorted blocks. */ +static +merge_block_t* +row_merge_block_merge( +/*==================*/ + /* out: Pointer to first sorted block + or NULL in case of error */ + merge_block_t* block1, /* in: First block to be merged */ + merge_block_t** block2, /* in/out: Second block to be merged. + Note that contents of the second sorted + block is returned with this parameter.*/ + dict_index_t* index) /* in: Index to be created */ +{ + merge_block_t* new_block1; + merge_block_t* new_block2; + merge_block_t* tmp; + merge_rec_t* mrec1; + merge_rec_t* mrec2; + ulint nth_rec1 = 0; + ulint nth_rec2 = 0; + ulint offset1 = 0; + ulint offset2 = 0; + ulint offset3 = 0; + ulint offset4 = 0; + ibool fits_to_new = TRUE; + int selected = 0; + mem_heap_t* heap; + mem_heap_t* offset_heap = NULL; + ulint sec_offsets1_[REC_OFFS_SMALL_SIZE]; + ulint* sec_offs1 = sec_offsets1_; + ulint sec_offsets2_[REC_OFFS_SMALL_SIZE]; + ulint* sec_offs2 = sec_offsets2_; + ulint* rec_offsets; + + ut_ad(block1 && block2 && *block2 && index); + ut_ad(row_merge_block_validate(block1, index)); + ut_ad(row_merge_block_validate(*block2, index)); + + *sec_offsets1_ = (sizeof sec_offsets1_) / sizeof *sec_offsets1_; + *sec_offsets2_ = (sizeof sec_offsets2_) / sizeof *sec_offsets2_; + + new_block1 = row_merge_block_create(); + new_block2 = row_merge_block_create(); + tmp = *block2; + heap = mem_heap_create(256); + + /* Copy block offset and next block offset to new blocks */ + + new_block1->header.offset = block1->header.offset; + new_block1->header.next = block1->header.next; + new_block2->header.offset = tmp->header.offset; + new_block2->header.next = tmp->header.next; + + /* Merge all records from both blocks */ + + while (nth_rec1 < block1->header.n_records || + nth_rec2 < tmp->header.n_records) { + + mrec1 = mrec2 = NULL; + selected = 0; + mem_heap_empty(heap); + + if (nth_rec1 < block1->header.n_records && + nth_rec2 >= tmp->header.n_records) { + + /* If the second block is empty read record from + the first block */ + + mrec1 = row_merge_read_rec_from_block( + block1, &offset1, heap, index); + + sec_offs1 = rec_get_offsets( + mrec1->rec, index, sec_offs1, ULINT_UNDEFINED, + &offset_heap); + + rec_offsets = sec_offs1; + + ut_ad(rec_validate(mrec1->rec, sec_offs1)); + + nth_rec1++; + + } else if (nth_rec2 < tmp->header.n_records && + nth_rec1 >= block1->header.n_records) { + + /* If the first block is empty read data tuple from + the second block */ + + mrec1 = row_merge_read_rec_from_block( + tmp, &offset2, heap, index); + + sec_offs1 = rec_get_offsets( + mrec1->rec, index, sec_offs1, ULINT_UNDEFINED, + &offset_heap); + + rec_offsets = sec_offs1; + + ut_ad(rec_validate(mrec1->rec, sec_offs1)); + + nth_rec2++; + } else { + ulint tmp_offset1 = offset1; + ulint tmp_offset2 = offset2; + + /* Both blocks contain record and thus they must + be compared */ + + mrec1 = row_merge_read_rec_from_block( + block1, &offset1, heap, index); + + sec_offs1 = rec_get_offsets( + mrec1->rec, index, sec_offs1, ULINT_UNDEFINED, + &offset_heap); + + ut_ad(rec_validate(mrec1->rec, sec_offs1)); + + mrec2 = row_merge_read_rec_from_block( + tmp, &offset2, heap, index); + + sec_offs2 = rec_get_offsets( + mrec2->rec, index, sec_offs2, ULINT_UNDEFINED, + &offset_heap); + + ut_ad(rec_validate(mrec2->rec, sec_offs2)); + + mrec1 = row_merge_select( + mrec1, mrec2, sec_offs1, sec_offs2, index, + &selected); + + /* If selected record is null we have duplicate key + on unique index */ + + if (mrec1 == NULL) { + goto error_handling; + } + + /* Addvance records on the block where record was + selected and set offset back on this record + on the block where record was not selected. */ + + if (selected == 1) { + rec_offsets = sec_offs1; + nth_rec1++; + offset2 = tmp_offset2; + } else { + rec_offsets = sec_offs2; + nth_rec2++; + offset1 = tmp_offset1; + } + } + + ut_ad(mrec1); + ut_ad(rec_validate(mrec1->rec, rec_offsets)); + + /* If the first output block is not yet full test whether this + new data tuple fits to block. If not this new data tuple must + be inserted to second output block */ + + if (fits_to_new) { + fits_to_new = row_merge_rec_fits_to_block( + rec_offsets, offset3); + } + + if (fits_to_new) { + offset3 = row_merge_store_rec_to_block( + mrec1->rec, rec_offsets, new_block1, offset3); + } else { + offset4 = row_merge_store_rec_to_block( + mrec1->rec, rec_offsets, new_block2, offset4); + } + + /* TODO: If we are using variable length keys, then in + some cases these keys do not fit to two empty blocks + in a different order. Therefore, some empty space is + left to every block. However, it has not been prooven + that this empty space is enought in all cases. Therefore, + here these overloaded records should be put on another + block. */ + } + + /* Free memory from old blocks and return pointers to new blocks */ + + if (offset_heap) { + mem_heap_free(offset_heap); + } + + mem_heap_free(heap); + mem_free(block1); + mem_free(tmp); + + ut_ad(row_merge_block_validate(new_block1, index)); + ut_ad(row_merge_block_validate(new_block2, index)); + + *block2 = new_block2; + + return(new_block1); + +error_handling: + /* Duplicate key was found and unique key was requested. Free all + allocated memory and return NULL */ + + if (offset_heap) { + mem_heap_free(offset_heap); + } + + mem_heap_free(heap); + mem_free(block1); + mem_free(tmp); + mem_free(new_block1); + mem_free(new_block2); + + return(NULL); +} + +/***************************************************************** +Merge sort for linked list in the disk. + +Merge sort takes the input list and makes log N passes along +the list and in each pass it combines each adjacent pair of +small sorted lists into one larger sorted list. When only a one +pass is needed the whole output list must be sorted. + +Linked list resides at the disk where every block represents a +item in the linked list and these items are single linked together +with next offset found from block header. Offset is calculated +from the start of the file. Thus whenever next item in the list +is requested this item is read from the disk. Similarly every +item is witten back to the disk when we have sorted two blocks +in the memory. + +In each pass, two lists of size block_size are merged into lists of +size block_size*2. Initially block_size=1. Merge starts by pointing +a temporary pointer list1 at the head of the list and also preparing +an empty list list_tail which we will add elements to the end. Then: + + 1) If block1 is NULL we terminate this pass. + + 2) Otherwise, there is at least one element in the next + pair of block_size lists therefore, increase the number of + merges performed in this pass. + + 3) Point another temporary pointer list2 as the same + place as list1. Iterate list2 by block_size elements + or until the end of the list. Let the list_size1 be the + number of elements in the list2. + + 4) Let list_size1=merge_size. Now we merge list starting at + list1 of length list_size2 with a list starting at list2 of + length at most list_size1. + + 5) So, as long as either the list1 is non-empty (list_size1) + or the list2 is non-empty (list_size2 and list2 pointing to + a element): + + 5.1) Select which list to take the next element from. + If either lists is empty, we choose from the other one. + If both lists are non-empty, compare the first element + of each and choose the lower one. + + 5.2) Remove that element, tmp, from the start of its + lists, by advancing list1 or list2 to next element + and decreasing list1_size or list2_size. + + 5.3) Add tmp to the end of the list_tail + + 6) At this point, we have advanced list1 until it is where + list2 started out and we have advanced list2 until it is + pointing at the next pair of block_size lists to merge. + Thus, set list1 to the value of list2 and go back to the + start of this loop. + +As soon as a pass like this is performed with only one merge, the +algorithm terminates. Otherwise, double the value of block_size +and go back to the beginning. */ + +dulint +row_merge_sort_linked_list_in_disk( +/*===============================*/ + /* out: offset to first block in + the list or ut_dulint_max in + case of error */ + dict_index_t* index, /* in: index to be created */ + os_file_t file, /* in: File handle */ + int* error) /* out: 0 or error */ +{ + merge_block_t* block1; + merge_block_t* block2; + merge_block_t* tmp; + merge_block_t* backup1; + merge_block_t* backup2; + merge_block_header_t header; + merge_file_t output; + ulint block_size; + ulint num_of_merges; + ulint list1_size; + ulint list2_size; + ulint i; + dulint list_head; + dulint list_tail; + dulint offset; + ibool list_is_empty; + int selected; + + ut_ad(index); + + /* Allocate memory for blocks */ + block1 = row_merge_block_create(); + block2 = row_merge_block_create(); + backup1 = block1; + backup2 = block2; + tmp = NULL; + + list_head = ut_dulint_create(0, 0); + list_tail = ut_dulint_create(0, 0); + + output.file = file; + + block_size = 1; /* We start from block size 1 */ + + for (;;) { + tmp = NULL; + block1 = backup1; + + row_merge_block_read(file, block1, list_head); + ut_ad(row_merge_block_validate(block1, index)); + list_head = ut_dulint_create(0, 0); + list_tail = ut_dulint_create(0, 0); + list_is_empty = TRUE; + num_of_merges = 0; /* We count number of merges we do in + this pass */ + + while (block1) { + num_of_merges++; + + header = block1->header; + offset = header.offset; + list1_size = 0; + + /* Count how many list elements we have in the list. */ + + for (i = 0; i < block_size; i++) { + list1_size++; + + /* Here read only the header to iterate the + list in the disk. */ + + row_merge_block_header_read(file, &header, + offset); + + offset = header.next; + + /* If the offset is zero we have arrived to the + end of disk list */ + + if (ut_dulint_is_zero(offset)) { + break; + } + } + + list2_size = block_size; + + /* If offset is zero we have reached end of the list in + the disk. */ + + if (ut_dulint_is_zero(offset)) { + block2 = NULL; + } else { + block2 = backup2; + row_merge_block_read(file, block2, offset); + ut_ad(row_merge_block_validate(block2, index)); + } + + /* If list2 is not empty, we have two lists to merge. + Otherwice, we have a sorted list. */ + + while (list1_size > 0 || (list2_size > 0 && block2)) { + /* Merge sort two lists by deciding whether + next element of merge comes from list1 or + list2. */ + + selected = 0; + tmp = NULL; + + if (list1_size == 0) { + /* First list is empty, next element + must come from the second list. */ + + tmp = block2; + + if (ut_dulint_is_zero( + block2->header.next)) { + block2 = NULL; + } + + list2_size--; + selected = 2; + + } else if (list2_size == 0 || !block2) { + /* Second list is empty, next record + must come from the first list. */ + + tmp = block1; + list1_size--; + selected = 1; + + } else { + /* Both lists contain a block and we + need to merge records on these block */ + + tmp = row_merge_block_merge(block1, + &block2, index); + + block1 = tmp; + backup1 = tmp; + backup2 = block2; + + if (tmp == NULL) { + goto error_handling; + } + + list1_size--; + selected = 1; + } + + /* Store head and tail offsets of the disk list. + Note that only records on the blocks are + changed not the order of the blocks in the + disk. */ + + if (list_is_empty) { + list_is_empty = FALSE; + list_head = tmp->header.offset; + } + + list_tail = tmp->header.offset; + + ut_ad(row_merge_block_validate(tmp, index)); + + row_merge_block_write( + file, tmp, tmp->header.offset); + + + /* Now we can read the next record from the + selected list if it contains more records */ + + if (!ut_dulint_is_zero(tmp->header.next)) { + row_merge_block_read(file, tmp, + tmp->header.next); + } else { + if (selected == 2) { + block2 = NULL; + } + } + } + + /* Now we have processed block_size items from the disk. + Swap blocks using pointers. */ + + if (block2) { + tmp = block2; + block2 = block1; + block1 = tmp; + backup1 = block1; + backup2 = block2; + } else { + block1 = NULL; + } + } + + + /* If we have done oly one merge, we have created a sorted + list */ + + if (num_of_merges <= 1) { + + mem_free(backup1); + mem_free(backup2); + + return(list_head); + } else { + /* Otherwise merge lists twice the size */ + block_size *= 2; + } + } + +error_handling: + + /* In the sort phase we can have duplicate key error, inform this to + upper layer */ + list_head = ut_dulint_max; + *error = DB_DUPLICATE_KEY; + + return(list_head); +} + +/************************************************************************ +Merge sort linked list in the memory and store part of the linked +list into a block and write this block to the disk. */ +static +ulint +row_merge_sort_and_store( +/*=====================*/ + /* out: 1 or 0 in case of error */ + dict_index_t* index, /* in: Index */ + merge_file_t* file, /* in: File where to write index + entries */ + merge_block_t* block, /* in/out: Block where to store + the list */ + merge_rec_list_t** list) /* in/out: Pointer to the list */ +{ + ut_ad(index && file && block && list); + + /* Firstly, merge sort linked list in the memory */ + if (!row_merge_sort_linked_list(index, *list)) { + return(0); + } + + /* Secondly, write part of the linked list to the block */ + *list = row_merge_write_list_to_block(*list, block, index); + + ut_ad(row_merge_block_validate(block, index)); + + /* Next block will be written directly behind this one. This will + create a 'linked list' of blocks to the disk. */ + + block->header.offset = file->offset; + + block->header.next= ut_dulint_add(file->offset, MERGE_BLOCK_SIZE); + + /* Thirdly, write block to the disk */ + row_merge_block_write(file->file, block, file->offset); + + file->offset= ut_dulint_add(file->offset, MERGE_BLOCK_SIZE); + + return(1); +} + +#ifdef UNIV_DEBUG_INDEX_CREATE +/************************************************************************ +Pretty print data tuple */ +static +void +row_merge_dtuple_print( +/*===================*/ + FILE* f, /* in: output stream */ + dtuple_t* dtuple) /* in: data tuple */ +{ + ulint n_fields; + ulint i; + + ut_ad(f && dtuple); + + n_fields = dtuple_get_n_fields(dtuple); + + fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields); + + for (i = 0; i < n_fields; i++) { + dfield_t* dfield; + + dfield = dtuple_get_nth_field(dtuple, i); + + fprintf(f, "%lu: ", (ulong) i); + + if (dfield->len != UNIV_SQL_NULL) { + dfield_print_also_hex(dfield); + } else { + fputs(" SQL NULL", f); + } + + putc(';', f); + } + + putc('\n', f); + ut_ad(dtuple_validate(dtuple)); +} +#endif /* UNIV_DEBUG_INDEX_CREATE */ + +/************************************************************************ +Reads clustered index of the table and create temporary files +containing index entries for indexes to be built. */ + +ulint +row_merge_read_clustered_index( +/*===========================*/ + /* out: DB_SUCCESS if successfull, + or ERROR code */ + trx_t* trx, /* in: transaction */ + dict_table_t* table, /* in: table where index is created */ + dict_index_t** index, /* in: indexes to be created */ + merge_file_t** files, /* in: Files where to write index + entries */ + ulint num_of_idx) /* in: number of indexes to be + created */ +{ + dict_index_t* clust_index; /* Cluster index */ + merge_rec_t* new_mrec; /* New merge record */ + mem_heap_t* row_heap; /* Heap memory to create + clustered index records */ + mem_heap_t* heap; /* Memory heap for + record lists and offsets */ + merge_block_t* block; /* Merge block where records + are stored for memory sort and + then written to the disk */ + merge_rec_list_t** merge_list; /* Temporary list for records*/ + merge_block_header_t* header; /* Block header */ + rec_t* rec; /* Record in the persistent + cursor*/ + btr_pcur_t pcur; /* Persistent cursor on the + cluster index */ + mtr_t mtr; /* Mini transaction */ + ibool more_records_exists; /* TRUE if we reached end of + the cluster index */ + ulint err = DB_SUCCESS; /* Return code */ + ulint idx_num = 0; /* Index number */ + ulint n_blocks = 0; /* Number of blocks written + to disk */ + ulint sec_offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* sec_offs = sec_offsets_; + + *sec_offsets_ = (sizeof sec_offsets_) / sizeof *sec_offsets_; + + trx->op_info="reading cluster index"; + + ut_a(trx && table && index && files); + + /* Create block where index entries are stored */ + block = row_merge_block_create(); + + /* Create and initialize memory for record lists */ + + heap = mem_heap_create(256); + merge_list = mem_heap_alloc(heap, num_of_idx * sizeof *merge_list); + + for (idx_num = 0; idx_num < num_of_idx; idx_num++) { + merge_list[idx_num] = row_merge_create_list(); + } + + mtr_start(&mtr); + + /* Find the clustered index and create a persistent cursor + based on that. */ + + clust_index = dict_table_get_first_index(table); + + btr_pcur_open_at_index_side( + TRUE, clust_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); + + row_heap = mem_heap_create(512); + + /* Get first record from the clustered index */ + rec = btr_pcur_get_rec(&pcur); + + /* Iterate all records in the clustered index */ + while (rec) { + dtuple_t* row; + row_ext_t* ext; + + /* Infimum and supremum records are skipped */ + + if (!page_rec_is_user_rec(rec)) { + + goto next_record; + + /* We don't count the delete marked records as "Inserted" */ + } else if (!rec_get_deleted_flag(rec, page_rec_is_comp(rec))) { + + srv_n_rows_inserted++; + } + + /* Build row based on clustered index */ + mem_heap_empty(row_heap); + + row = row_build(ROW_COPY_POINTERS, + clust_index, rec, NULL, &ext, row_heap); + + /* If the user has requested the creation of several indexes + for the same table. We build all index entries in a single + pass over the cluster index. */ + + for (idx_num = 0; idx_num < num_of_idx; idx_num++) { + + dtuple_t* index_tuple; + + index_tuple = row_build_index_entry( + row, ext, + index[idx_num], merge_list[idx_num]->heap); + +#ifdef UNIV_DEBUG_INDEX_CREATE + row_merge_dtuple_print(stderr, index_tuple); +#endif + + new_mrec = row_merge_rec_create( + index_tuple, + ext ? ext->ext : NULL, ext ? ext->n_ext : 0, + index[idx_num], merge_list[idx_num]->heap); + + sec_offs = rec_get_offsets( + new_mrec->rec, index[idx_num], sec_offs, + ULINT_UNDEFINED, &heap); + + /* Add data tuple to linked list of data tuples */ + + row_merge_list_add( + new_mrec, rec_offs_size(sec_offs), + merge_list[idx_num]); + + /* If we have enought data tuples to form a block + sort linked list and store it to the block and + write this block to the disk. Note that not all + data tuples in the list fit to the block.*/ + + if (merge_list[idx_num]->total_size >= + MERGE_BLOCK_SIZE) { + + if (!row_merge_sort_and_store( + index[idx_num], + files[idx_num], + block, + &(merge_list[idx_num]))) { + + trx->error_key_num = idx_num; + err = DB_DUPLICATE_KEY; + goto error_handling; + } + + n_blocks++; + files[idx_num]->num_of_blocks++; + } + } + + +next_record: + /* Persistent cursor has to be stored and mtr committed + if we move to a new page in cluster index. */ + + if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) { + btr_pcur_store_position(&pcur, &mtr); + mtr_commit(&mtr); + mtr_start(&mtr); + btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); + } + + more_records_exists = btr_pcur_move_to_next(&pcur, &mtr); + + /* If no records are left we have created file for merge + sort */ + + if (more_records_exists == TRUE) { + rec = btr_pcur_get_rec(&pcur); + } else { + rec = NULL; + } + } + + /* Now we have to write all remaining items in the list to + blocks and write these blocks to the disk */ + + for (idx_num = 0; idx_num < num_of_idx; idx_num++) { + + /* While we have items in the list write them + to the block */ + + if (merge_list[idx_num]->n_records > 0) { + + /* Next block will be written directly + behind this one. This will create a + 'linked list' of blocks to the disk. */ + + block->header.offset = files[idx_num]->offset; + + block->header.next= ut_dulint_add( + files[idx_num]->offset, MERGE_BLOCK_SIZE); + + if (!row_merge_sort_and_store( + index[idx_num], + files[idx_num], + block, + &(merge_list[idx_num]))) { + + trx->error_key_num = idx_num; + err = DB_DUPLICATE_KEY; + goto error_handling; + } + + files[idx_num]->num_of_blocks++; + n_blocks++; + } + + /* To the last block header we set (0, 0) to next + offset to mark the end of the list. */ + + header = &(block->header); + header->next = ut_dulint_create(0, 0); + + row_merge_block_header_write( + files[idx_num]->file, header, header->offset); + } + +#ifdef UNIV_DEBUG_INDEX_CREATE + fprintf(stderr, "Stored %lu blocks\n", n_blocks); +#endif + +error_handling: + + /* Cleanup resources */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(row_heap); + mem_free(block); + + for (idx_num = 0; idx_num < num_of_idx; idx_num++) { + mem_heap_free(merge_list[idx_num]->heap); + } + + mem_heap_free(heap); + + trx->op_info=""; + + return(err); +} + +/************************************************************************ +Read sorted file containing index data tuples and insert these data +tuples to the index */ + +ulint +row_merge_insert_index_tuples( +/*==========================*/ + /* out: 0 or error number */ + trx_t* trx, /* in: transaction */ + dict_index_t* index, /* in: index */ + dict_table_t* table, /* in: table */ + os_file_t file, /* in: file handle */ + dulint offset) /* in: offset where to start + reading */ +{ + merge_block_t* block; + que_thr_t* thr; + ins_node_t* node; + mem_heap_t* rec_heap; + mem_heap_t* dtuple_heap; + mem_heap_t* graph_heap; + ulint error = DB_SUCCESS; + ibool more_records = TRUE; + ibool was_lock_wait = FALSE; + + ut_ad(trx && index && table); + + /* We use the insert query graph as the dummy graph + needed in the row module call */ + + trx->op_info = "inserting index entries"; + + graph_heap = mem_heap_create(512); + node = ins_node_create(INS_DIRECT, table, graph_heap); + + thr = pars_complete_graph_for_exec(node, trx, graph_heap); + + que_thr_move_to_run_state_for_mysql(thr, trx); + + block = row_merge_block_create(); + rec_heap = mem_heap_create(128); + dtuple_heap = mem_heap_create(256); + row_merge_block_read(file, block, offset); + + while (more_records) { + ulint n_rec; + ulint tuple_offset; + + ut_ad(row_merge_block_validate(block, index)); + tuple_offset = 0; + + for (n_rec = 0; n_rec < block->header.n_records; n_rec++) { + merge_rec_t* mrec; + dtuple_t* dtuple; + + mrec = row_merge_read_rec_from_block( + block, &tuple_offset, rec_heap,index); + + if (!rec_get_deleted_flag(mrec->rec, 0)) { + + dtuple = row_rec_to_index_entry( + ROW_COPY_POINTERS, + index, mrec->rec, dtuple_heap); + + node->row = dtuple; + node->table = table; + node->trx_id = trx->id; + + ut_ad(dtuple_validate(dtuple)); + +#ifdef UNIV_DEBUG_INDEX_CREATE + row_merge_dtuple_print(stderr, dtuple); +#endif + +run_again: + thr->run_node = thr; + thr->prev_node = thr->common.parent; + + error = row_ins_index_entry( + index, dtuple, NULL, 0, thr); + + mem_heap_empty(rec_heap); + mem_heap_empty(dtuple_heap); + + if (error != DB_SUCCESS) { + goto error_handling; + } + } + } + + offset = block->header.next; + + /* If we have reached the end of the disk list we have + inserted all of the index entries to the index. */ + + if (ut_dulint_is_zero(offset)) { + more_records = FALSE; + } else { + row_merge_block_read(file, block, offset); + } + } + + que_thr_stop_for_mysql_no_error(thr, trx); + que_graph_free(thr->graph); + + trx->op_info=""; + + mem_free(block); + mem_heap_free(rec_heap); + mem_heap_free(dtuple_heap); + + return(error); + +error_handling: + + thr->lock_state = QUE_THR_LOCK_ROW; + trx->error_state = error; + que_thr_stop_for_mysql(thr); + thr->lock_state = QUE_THR_LOCK_NOLOCK; + + was_lock_wait = row_mysql_handle_errors(&error, trx, thr, NULL); + + if (was_lock_wait) { + goto run_again; + } + + que_graph_free(thr->graph); + + trx->op_info = ""; + + if (block) { + mem_free(block); + } + + mem_heap_free(rec_heap); + mem_heap_free(dtuple_heap); + + return(error); +} + +/************************************************************************* +Remove a index from system tables */ + +ulint +row_merge_remove_index( +/*===================*/ + /* out: error code or DB_SUCCESS */ + dict_index_t* index, /* in: index to be removed */ + dict_table_t* table, /* in: table */ + trx_t* trx) /* in: transaction handle */ +{ + que_thr_t* thr; + que_t* graph; + mem_heap_t* sql_heap; + ulint err; + char* sql; + ibool dict_lock = FALSE; + + /* We use the private SQL parser of Innobase to generate the + query graphs needed in deleting the dictionary data from system + tables in Innobase. Deleting a row from SYS_INDEXES table also + frees the file segments of the B-tree associated with the index. */ + + static const char str1[] = + "PROCEDURE DROP_INDEX_PROC () IS\n" + "indexid CHAR;\n" + "tableid CHAR;\n" + "table_id_high INT;\n" + "table_id_low INT;\n" + "index_id_high INT;\n" + "index_id_low INT;\n" + "BEGIN\n" + "index_id_high := %lu;\n" + "index_id_low := %lu;\n" + "table_id_high := %lu;\n" + "table_id_low := %lu;\n" + "indexid := CONCAT(TO_BINARY(index_id_high, 4)," + " TO_BINARY(index_id_low, 4));\n" + "tableid := CONCAT(TO_BINARY(table_id_high, 4)," + " TO_BINARY(table_id_low, 4));\n" + "DELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n" + "DELETE FROM SYS_INDEXES WHERE ID = indexid\n" + " AND TABLE_ID = tableid;\n" + "END;\n"; + + ut_ad(index && table && trx); + + trx_start_if_not_started(trx); + trx->op_info = "dropping index"; + + sql_heap = mem_heap_create(256); + + sql = mem_heap_alloc(sql_heap, sizeof(str1) + 80); + + sprintf(sql, "%s", str1); + + sprintf(sql, sql, ut_dulint_get_high(index->id), + ut_dulint_get_low(index->id), ut_dulint_get_high(table->id), + ut_dulint_get_low(table->id)); + + if (trx->dict_operation_lock_mode == 0) { + row_mysql_lock_data_dictionary(trx); + dict_lock = TRUE; + } + + graph = pars_sql(NULL, sql); + + ut_a(graph); + mem_heap_free(sql_heap); + + graph->trx = trx; + trx->graph = NULL; + + graph->fork_type = QUE_FORK_MYSQL_INTERFACE; + + ut_a(thr = que_fork_start_command(graph)); + + que_run_threads(thr); + + err = trx->error_state; + + if (err != DB_SUCCESS) { + row_mysql_handle_errors(&err, trx, thr, NULL); + + ut_error; + } else { + /* Replace this index with another equivalent index for all + foreign key constraints on this table where this index + is used */ + + dict_table_replace_index_in_foreign_list(table, index); + + if (trx->dict_redo_list) { + dict_redo_remove_index(trx, index); + } + + dict_index_remove_from_cache(table, index); + } + + que_graph_free(graph); + + if (dict_lock) { + row_mysql_unlock_data_dictionary(trx); + } + + trx->op_info = ""; + + return(err); +} + +/************************************************************************* +Allocate and initialize memory for a merge file structure */ + +merge_file_t* +row_merge_create_file_structure( +/*============================*/ + /* out: pointer to merge file + structure */ + mem_heap_t* heap) /* in: heap where merge file structure + is allocated */ +{ + merge_file_t* merge_file; + + merge_file = (merge_file_t*) mem_heap_alloc(heap, sizeof(merge_file_t)); + + merge_file->file = innobase_mysql_tmpfile(); + + merge_file->offset = ut_dulint_create(0, 0); + merge_file->num_of_blocks = 0; + + return(merge_file); +} + +#ifdef UNIV_DEBUG_INDEX_CREATE +/************************************************************************* +Print definition of a table in the dictionary */ + +void +row_merge_print_table( +/*==================*/ + dict_table_t* table) /* in: table */ +{ + dict_table_print(table); +} +#endif + +/************************************************************************* +Mark all prebuilts using the table obsolete. These prebuilts are +rebuilt later. */ + +void +row_merge_mark_prebuilt_obsolete( +/*=============================*/ + + trx_t* trx, /* in: trx */ + dict_table_t* table) /* in: table */ +{ + row_prebuilt_t* prebuilt; + + row_mysql_lock_data_dictionary(trx); + + prebuilt = UT_LIST_GET_FIRST(table->prebuilts); + + while (prebuilt) { + prebuilt->magic_n = ROW_PREBUILT_OBSOLETE; + prebuilt->magic_n2 = ROW_PREBUILT_OBSOLETE; + + prebuilt = UT_LIST_GET_NEXT(prebuilts, prebuilt); + } + + /* This table will be dropped when there are no more references + to it */ + table->to_be_dropped = 1; + + row_mysql_unlock_data_dictionary(trx); +} + +/************************************************************************* +Create a temporary table using a definition of the old table. You must +lock data dictionary before calling this function. */ + +dict_table_t* +row_merge_create_temporary_table( +/*=============================*/ + /* out: new temporary table + definition */ + const char* table_name, /* in: new table name */ + dict_table_t* table, /* in: old table definition */ + trx_t* trx, /* in: trx */ + ulint* error) /* in:out/ error code or DB_SUCCESS */ +{ + ulint i; + dict_table_t* new_table = NULL; + ulint n_cols = dict_table_get_n_user_cols(table); + + ut_ad(table_name && table && error); + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&dict_sys->mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + *error = row_undo_report_create_table_dict_operation(trx, table_name); + + if (*error == DB_SUCCESS) { + + mem_heap_t* heap = mem_heap_create(1000); + log_buffer_flush_to_disk(); + + new_table = dict_mem_table_create( + table_name, 0, n_cols, table->flags); + + for (i = 0; i < n_cols; i++) { + const dict_col_t* col; + + col = dict_table_get_nth_col(table, i); + + dict_mem_table_add_col( + new_table, heap, + dict_table_get_col_name(table, i), + col->mtype, col->prtype, col->len); + } + + mem_heap_free(heap); + *error = row_create_table_for_mysql(new_table, trx); + } + + return(new_table); +} + +/************************************************************************* +Rename the indexes in the dicitionary. */ + +ulint +row_merge_rename_index( +/*===================*/ + trx_t* trx, /* in: Transaction */ + dict_table_t* table, /* in: Table for index */ + dict_index_t* index) /* in: Index to rename */ +{ + que_thr_t* thr; + char* sql; + que_t* graph; + ulint name_len; + mem_heap_t* sql_heap; + ibool dict_lock = FALSE; + ulint err = DB_SUCCESS; + + /* Only rename from temp names */ + ut_a(*index->name == TEMP_TABLE_PREFIX); + + /* We use the private SQL parser of Innobase to generate the + query graphs needed in renaming index. */ + + static const char str1[] = + "PROCEDURE RENAME_INDEX_PROC () IS\n" + "indexid CHAR;\n" + "tableid CHAR;\n" + "table_id_high INT;\n" + "table_id_low INT;\n" + "index_id_high INT;\n" + "index_id_low INT;\n" + "BEGIN\n" + "index_id_high := %lu;\n" + "index_id_low := %lu;\n" + "table_id_high := %lu;\n" + "table_id_low := %lu;\n" + "indexid := CONCAT(TO_BINARY(index_id_high, 4)," + " TO_BINARY(index_id_low, 4));\n" + "tableid := CONCAT(TO_BINARY(table_id_high, 4)," + " TO_BINARY(table_id_low, 4));\n" + "UPDATE SYS_INDEXES SET NAME = '%s'\n" + " WHERE ID = indexid AND TABLE_ID = tableid;\n" + "END;\n"; + + table = index->table; + + ut_ad(index && table && trx); + + trx_start_if_not_started(trx); + trx->op_info = "renaming index"; + + sql_heap = mem_heap_create(1024); + + name_len = strlen(index->name); + sql = mem_heap_alloc(sql_heap, sizeof(str1) + 4 * 15 + name_len); + + sprintf(sql, str1, + ut_dulint_get_high(index->id), ut_dulint_get_low(index->id), + ut_dulint_get_high(table->id), ut_dulint_get_low(table->id), + index->name + 1); /* Skip the TEMP_TABLE_PREFIX marker */ + + if (trx->dict_operation_lock_mode == 0) { + row_mysql_lock_data_dictionary(trx); + dict_lock = TRUE; + } + + graph = pars_sql(NULL, sql); + + ut_a(graph); + mem_heap_free(sql_heap); + + graph->trx = trx; + trx->graph = NULL; + + graph->fork_type = QUE_FORK_MYSQL_INTERFACE; + + ut_a(thr = que_fork_start_command(graph)); + + que_run_threads(thr); + + err = trx->error_state; + + if (err == DB_SUCCESS) { + strcpy(index->name, index->name + 1); + } + + que_graph_free(graph); + + if (dict_lock) { + row_mysql_unlock_data_dictionary(trx); + } + + trx->op_info = ""; + + return(err); +} + +/************************************************************************* +Create the index and load in to the dicitionary. */ + +ulint +row_merge_create_index( +/*===================*/ + trx_t* trx, /* in: transaction */ + dict_index_t** index, /* out: the instance of the index */ + dict_table_t* table, /* in: the index is on this table */ + const merge_index_def_t* /* in: the index definition */ + index_def) +{ + ulint err = DB_SUCCESS; + ulint n_fields = index_def->n_fields; + + /* Create the index prototype, using the passed in def, this is not + a persistent operation. We pass 0 as the space id, and determine at + a lower level the space id where to store the table.*/ + + *index = dict_mem_index_create( + table->name, index_def->name, 0, index_def->ind_type, n_fields); + + ut_a(*index); + + /* Create the index id, as it will be required when we build + the index. We assign the id here because we want to write an + UNDO record before we insert the entry into SYS_INDEXES.*/ + ut_a(ut_dulint_is_zero((*index)->id)); + + (*index)->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID); + (*index)->table = table; + + /* Write the UNDO record for the create index */ + err = row_undo_report_create_index_dict_operation(trx, *index); + + if (err == DB_SUCCESS) { + ulint i; + + /* Make sure the UNDO record gets to disk */ + log_buffer_flush_to_disk(); + + for (i = 0; i < n_fields; i++) { + merge_index_field_t* ifield; + + ifield = index_def->fields[i]; + + /* TODO: [What's this comment] We assume all fields + should be sorted in ascending order, hence the '0' */ + + dict_mem_index_add_field( + *index, ifield->field_name, ifield->prefix_len); + } + + /* Add the index to SYS_INDEXES, this will use the prototype + to create an entry in SYS_INDEXES.*/ + err = row_create_index_graph_for_mysql(trx, table, *index); + + if (err == DB_SUCCESS) { + + *index = row_merge_dict_table_get_index( + table, index_def); + + ut_a(*index); + + /* Note the id of the transaction that created this + index, we use it to restrict readers from accessing + this index, to ensure read consistency.*/ + (*index)->trx_id = trx->id; + + /* Create element and append to list in trx. So that + we can rename from temp name to real name.*/ + if (trx->dict_redo_list) { + dict_redo_t* dict_redo; + + dict_redo = dict_redo_create_element(trx); + dict_redo->index = *index; + } + } + } + + return(err); +} + +/************************************************************************* +Check if a transaction can use an index.*/ + +ibool +row_merge_is_index_usable( +/*======================*/ + const trx_t* trx, /* in: transaction */ + const dict_index_t* /* in: index to check */ + index) +{ + if (!trx->read_view) { + return(TRUE); + } + + return(ut_dulint_cmp(index->trx_id, trx->read_view->low_limit_id) < 0); +} + +/************************************************************************* +Drop the old table.*/ + +ulint +row_merge_drop_table( +/*=================*/ + /* out: DB_SUCCESS if all OK else + error code.*/ + trx_t* trx, /* in: transaction */ + dict_table_t* table) /* in: table to drop */ +{ + ulint err = DB_SUCCESS; + ibool dict_locked = FALSE; + + if (trx->dict_operation_lock_mode == 0) { + row_mysql_lock_data_dictionary(trx); + dict_locked = TRUE; + } + + ut_a(table->to_be_dropped); + ut_a(*table->name == TEMP_TABLE_PREFIX); + + /* Drop the table immediately iff it is not references by MySQL */ + if (table->n_mysql_handles_opened == 0) { + /* Set the commit flag to FALSE.*/ + err = row_drop_table_for_mysql(table->name, trx, FALSE); + } + + if (dict_locked) { + row_mysql_unlock_data_dictionary(trx); + } + + return(err); +} + diff --git a/row/row0mysql.c b/row/row0mysql.c index e50ee334bd5..3aefa259bef 100644 --- a/row/row0mysql.c +++ b/row/row0mysql.c @@ -25,12 +25,15 @@ Created 9/17/2000 Heikki Tuuri #include "dict0boot.h" #include "trx0roll.h" #include "trx0purge.h" +#include "trx0rec.h" +#include "trx0undo.h" #include "lock0lock.h" #include "rem0cmp.h" #include "log0log.h" #include "btr0sea.h" #include "fil0fil.h" #include "ibuf0ibuf.h" +#include "row0merge.h" /* A dummy variable used to fool the compiler */ ibool row_mysql_identically_false = FALSE; @@ -79,6 +82,9 @@ row_mysql_is_system_table( } #endif /* !UNIV_HOTBACKUP */ +static ibool row_add_table_to_background_drop_list(dict_table_t* table); +/*====================================================================*/ + /*********************************************************************** Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */ static @@ -548,7 +554,6 @@ handle_new_error: "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" "forcing-recovery.html" " for help.\n", stderr); - } else { fprintf(stderr, "InnoDB: unknown error code %lu\n", (ulong) err); @@ -656,9 +661,39 @@ row_create_prebuilt( prebuilt->old_vers_heap = NULL; + UT_LIST_ADD_LAST(prebuilts, table->prebuilts, prebuilt); + return(prebuilt); } +/************************************************************************ +Update a prebuilt struct for a MySQL table handle. */ + +void +row_update_prebuilt( +/*================*/ + row_prebuilt_t* prebuilt, /* in: Innobase table handle */ + dict_table_t* table) /* in: table */ +{ + dict_index_t* clust_index; + + ut_ad(prebuilt && prebuilt->heap && table); + ut_ad(prebuilt->magic_n == ROW_PREBUILT_OBSOLETE); + + prebuilt->magic_n = ROW_PREBUILT_ALLOCATED; + prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED; + + clust_index = dict_table_get_first_index(table); + + if (!prebuilt->index) { + prebuilt->index = clust_index; + } + + if (prebuilt->ins_node) { + ins_node_create_entry_list(prebuilt->ins_node); + } +} + /************************************************************************ Free a prebuilt struct for a MySQL table handle. */ @@ -669,12 +704,15 @@ row_prebuilt_free( { ulint i; - if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED - || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED) { + if ((prebuilt->magic_n != ROW_PREBUILT_ALLOCATED + && prebuilt->magic_n != ROW_PREBUILT_OBSOLETE) + || (prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED + && prebuilt->magic_n != ROW_PREBUILT_OBSOLETE)) { + fprintf(stderr, "InnoDB: Error: trying to free a corrupt\n" "InnoDB: table handle. Magic n %lu," - " magic n2 %lu, table name", + " magic n2 %lu, table name ", (ulong) prebuilt->magic_n, (ulong) prebuilt->magic_n2); ut_print_name(stderr, NULL, TRUE, prebuilt->table->name); @@ -738,6 +776,35 @@ row_prebuilt_free( dict_table_decrement_handle_count(prebuilt->table); + /* If there were references to this table when a primary index on + this table was created then we drop it here since there are no + references to it now.*/ + if (prebuilt->table->to_be_dropped + && prebuilt->table->n_mysql_handles_opened == 0) { + ibool added; + + added = row_add_table_to_background_drop_list(prebuilt->table); + + assert(*prebuilt->table->name == TEMP_TABLE_PREFIX); + + ut_print_timestamp(stderr); + + if (added) { + fputs(" InnoDB: Dropping table ", stderr); + ut_print_name(stderr, NULL, TRUE, + prebuilt->table->name); + putc('\n', stderr); + } else { + fputs(" InnoDB: Error: failed trying to add ", + stderr); + ut_print_name(stderr, NULL, TRUE, + prebuilt->table->name); + fputs(" to the background drop list.\n", stderr); + } + } + + UT_LIST_REMOVE(prebuilts, prebuilt->table->prebuilts, prebuilt); + mem_heap_free(prebuilt->heap); } @@ -767,9 +834,9 @@ row_update_prebuilt_trx( if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { fprintf(stderr, "InnoDB: Error: trying to use a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name", + "InnoDB: table handle. Magic n %lu, table name ", (ulong) prebuilt->magic_n); - ut_print_name(stderr, NULL, TRUE, prebuilt->table->name); + ut_print_name(stderr, trx, TRUE, prebuilt->table->name); putc('\n', stderr); mem_analyze_corruption(prebuilt); @@ -1048,6 +1115,88 @@ run_again: return((int) err); } +/************************************************************************* +Sets a table lock on the table */ + +int +row_lock_table_for_merge( +/*=====================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: lock table for this trx */ + dict_table_t* table, /* in: table to lock */ + ulint mode) /* in: lock mode of table */ +{ + mem_heap_t* heap; /* Memory heap */ + que_thr_t* thr; + ulint err; + sel_node_t* node; + + ut_ad(trx); + ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + + heap = mem_heap_create(512); + + trx->op_info = "setting table lock for index merge"; + + node = sel_node_create(heap); + thr = pars_complete_graph_for_exec(node, trx, heap); + /* SB: Not sure about this - Ask Heikki */ + thr->graph->state = QUE_FORK_ACTIVE; + + /* We use the select query graph as the dummy graph needed + in the lock module call */ + + thr = que_fork_get_first_thr(que_node_get_parent(thr)); + que_thr_move_to_run_state_for_mysql(thr, trx); + +run_again: + thr->run_node = thr; + thr->prev_node = thr->common.parent; + + err = lock_table(0, table, mode, thr); + + trx->error_state = err; + + if (err != DB_SUCCESS) { + que_thr_stop_for_mysql(thr); + + if (err != DB_QUE_THR_SUSPENDED) { + ibool was_lock_wait; + + was_lock_wait = row_mysql_handle_errors( + &err, trx, thr, NULL); + + if (was_lock_wait) { + goto run_again; + } + + } else { + que_thr_t* run_thr; + que_node_t* parent; + + parent = que_node_get_parent(thr); + run_thr = que_fork_start_command(parent); + + ut_a(run_thr == thr); + + /* There was a lock wait but the thread was not + in a ready to run or running state.*/ + trx->error_state = DB_LOCK_WAIT; + + goto run_again; + } + + trx->op_info = ""; + + return((int) err); + } + + que_thr_stop_for_mysql_no_error(thr, trx); + + trx->op_info = ""; + + return((int) err); +} /************************************************************************* Does an insert for MySQL. */ @@ -1065,6 +1214,7 @@ row_insert_for_mysql( ibool was_lock_wait; trx_t* trx = prebuilt->trx; ins_node_t* node = prebuilt->ins_node; + dict_table_t* table; ut_ad(trx); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); @@ -1087,13 +1237,13 @@ row_insert_for_mysql( return(DB_ERROR); } - if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { + if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED + && prebuilt->magic_n != ROW_PREBUILT_OBSOLETE) { fprintf(stderr, "InnoDB: Error: trying to free a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name", + "InnoDB: table handle. Magic n %lu, table name ", (ulong) prebuilt->magic_n); - ut_print_name(stderr, prebuilt->trx, TRUE, - prebuilt->table->name); + ut_print_name(stderr, trx, TRUE, prebuilt->table->name); putc('\n', stderr); mem_analyze_corruption(prebuilt); @@ -1101,6 +1251,10 @@ row_insert_for_mysql( ut_error; } + if (prebuilt->magic_n == ROW_PREBUILT_OBSOLETE) { + row_update_prebuilt(prebuilt, prebuilt->table); + } + if (srv_created_new_raw || srv_force_recovery) { fputs("InnoDB: A new raw disk partition was initialized or\n" "InnoDB: innodb_force_recovery is on: we do not allow\n" @@ -1122,6 +1276,14 @@ row_insert_for_mysql( if (node == NULL) { row_get_prebuilt_insert_row(prebuilt); node = prebuilt->ins_node; + } else { + table = dict_table_get(prebuilt->table->name, FALSE); + + if (prebuilt->ins_node->table_version_number != + table->version_number) { + row_get_prebuilt_insert_row(prebuilt); + node = prebuilt->ins_node; + } } row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec); @@ -1143,6 +1305,10 @@ run_again: thr->run_node = node; thr->prev_node = node; + if (prebuilt->magic_n == ROW_PREBUILT_OBSOLETE) { + row_update_prebuilt(prebuilt, prebuilt->table); + } + row_ins_step(thr); err = trx->error_state; @@ -1325,13 +1491,13 @@ row_update_for_mysql( return(DB_ERROR); } - if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { + if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED + && prebuilt->magic_n != ROW_PREBUILT_OBSOLETE) { fprintf(stderr, "InnoDB: Error: trying to free a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name", + "InnoDB: table handle. Magic n %lu, table name ", (ulong) prebuilt->magic_n); - ut_print_name(stderr, prebuilt->trx, TRUE, - prebuilt->table->name); + ut_print_name(stderr, trx, TRUE, prebuilt->table->name); putc('\n', stderr); mem_analyze_corruption(prebuilt); @@ -1339,6 +1505,10 @@ row_update_for_mysql( ut_error; } + if (prebuilt->magic_n == ROW_PREBUILT_OBSOLETE) { + row_update_prebuilt(prebuilt, prebuilt->table); + } + if (srv_created_new_raw || srv_force_recovery) { fputs("InnoDB: A new raw disk partition was initialized or\n" "InnoDB: innodb_force_recovery is on: we do not allow\n" @@ -1391,6 +1561,10 @@ run_again: thr->run_node = node; thr->prev_node = node; + if (prebuilt->magic_n == ROW_PREBUILT_OBSOLETE) { + row_update_prebuilt(prebuilt, prebuilt->table); + } + row_upd_step(thr); err = trx->error_state; @@ -1749,6 +1923,9 @@ row_create_table_for_mysql( ulint table_name_len; ulint err; ulint i; + ibool retry; + + retry = FALSE; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); #ifdef UNIV_SYNC_DEBUG @@ -1868,6 +2045,7 @@ row_create_table_for_mysql( heap = mem_heap_create(512); +retry_create: trx->dict_operation = TRUE; node = tab_create_graph_create(table, heap); @@ -1884,9 +2062,10 @@ row_create_table_for_mysql( trx->error_state = DB_SUCCESS; - trx_general_rollback_for_mysql(trx, FALSE, NULL); if (err == DB_OUT_OF_FILE_SPACE) { + trx_general_rollback_for_mysql(trx, FALSE, NULL); + ut_print_timestamp(stderr); fputs(" InnoDB: Warning: cannot create table ", @@ -1903,30 +2082,51 @@ row_create_table_for_mysql( } else if (err == DB_DUPLICATE_KEY) { ut_print_timestamp(stderr); - fputs(" InnoDB: Error: table ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs(" already exists in InnoDB internal\n" - "InnoDB: data dictionary. Have you deleted" - " the .frm file\n" - "InnoDB: and not used DROP TABLE?" - " Have you used DROP DATABASE\n" - "InnoDB: for InnoDB tables in" - " MySQL version <= 3.23.43?\n" - "InnoDB: See the Restrictions section" - " of the InnoDB manual.\n" - "InnoDB: You can drop the orphaned table" - " inside InnoDB by\n" - "InnoDB: creating an InnoDB table with" - " the same name in another\n" - "InnoDB: database and copying the .frm file" - " to the current database.\n" - "InnoDB: Then MySQL thinks the table exists," - " and DROP TABLE will\n" - "InnoDB: succeed.\n" - "InnoDB: You can look for further help from\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n", - stderr); + if (*table->name != TEMP_TABLE_PREFIX) { + trx_general_rollback_for_mysql( + trx, FALSE, NULL); + + fputs(" InnoDB: Error: table ", stderr); + ut_print_name(stderr, trx, TRUE, table->name); + fputs(" already exists in InnoDB internal\n" + "InnoDB: data dictionary. Have you deleted" + " the .frm file\n" + "InnoDB: and not used DROP TABLE?" + " Have you used DROP DATABASE\n" + "InnoDB: for InnoDB tables in" + " MySQL version <= 3.23.43?\n" + "InnoDB: See the Restrictions section" + " of the InnoDB manual.\n" + "InnoDB: You can drop the orphaned table" + " inside InnoDB by\n" + "InnoDB: creating an InnoDB table with" + " the same name in another\n" + "InnoDB: database and copying the .frm file" + " to the current database.\n" + "InnoDB: Then MySQL thinks the table exists," + " and DROP TABLE will\n" + "InnoDB: succeed.\n" + "InnoDB: You can look for further help from\n" + "InnoDB: " + "http://dev.mysql.com/doc/refman/5.1/en/" + "innodb-troubleshooting.html\n", + stderr); + } else if (!retry) { + fputs(" InnoDB: Warning: table ", stderr); + ut_print_name(stderr, trx, TRUE, table->name); + fputs(" already exists in InnoDB internal\n" + "InnoDB: dropping old temporary table\n", + stderr); + + row_drop_table_for_mysql(table->name, trx, + FALSE); + + retry = TRUE; + goto retry_create; + } else { + trx_general_rollback_for_mysql( + trx, FALSE, NULL); + } } /* We may also get err == DB_ERROR if the .ibd file for the @@ -1964,7 +2164,7 @@ row_create_index_for_mysql( mem_heap_t* heap; que_thr_t* thr; ulint err; - ulint i, j; + ulint i; ulint len; #ifdef UNIV_SYNC_DEBUG @@ -1982,11 +2182,12 @@ row_create_index_for_mysql( safer not to allow them. */ for (i = 0; i < dict_index_get_n_fields(index); i++) { + ulint j; + for (j = 0; j < i; j++) { if (0 == ut_strcmp( dict_index_get_nth_field(index, j)->name, dict_index_get_nth_field(index, i)->name)) { - ut_print_timestamp(stderr); fputs(" InnoDB: Error: column ", stderr); @@ -2433,10 +2634,10 @@ row_discard_tablespace_for_mysql( ut_print_timestamp(ef); fputs(" Cannot DISCARD table ", ef); - ut_print_name(ef, trx, TRUE, name); + ut_print_name(stderr, trx, TRUE, name); fputs("\n" "because it is referenced by ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); + ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name); putc('\n', ef); mutex_exit(&dict_foreign_err_mutex); @@ -2761,10 +2962,10 @@ row_truncate_table_for_mysql( ut_print_timestamp(ef); fputs(" Cannot truncate table ", ef); - ut_print_name(ef, trx, TRUE, table->name); + ut_print_name(stderr, trx, TRUE, table->name); fputs(" by DROP+CREATE\n" "InnoDB: because it is referenced by ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); + ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name); putc('\n', ef); mutex_exit(&dict_foreign_err_mutex); @@ -2982,6 +3183,30 @@ row_drop_table_for_mysql( const char* name, /* in: table name */ trx_t* trx, /* in: transaction handle */ ibool drop_db)/* in: TRUE=dropping whole database */ +{ + ulint err; + + err = row_drop_table_for_mysql_no_commit(name, trx, drop_db); + + if (!srv_created_new_raw) { + trx_commit_for_mysql(trx); + } + + return err; +} + +/************************************************************************* +Drops a table for MySQL. If the name of the table to be dropped is equal +with one of the predefined magic table names, then this also stops printing +the corresponding monitor output by the master thread. */ + +int +row_drop_table_for_mysql_no_commit( +/*===============================*/ + /* out: error code or DB_SUCCESS */ + const char* name, /* in: table name */ + trx_t* trx, /* in: transaction handle */ + ibool drop_db)/* in: TRUE=dropping whole database */ { dict_foreign_t* foreign; dict_table_t* table; @@ -3111,10 +3336,10 @@ check_next_foreign: ut_print_timestamp(ef); fputs(" Cannot drop table ", ef); - ut_print_name(ef, trx, TRUE, name); + ut_print_name(stderr, trx, TRUE, name); fputs("\n" "because it is referenced by ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); + ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name); putc('\n', ef); mutex_exit(&dict_foreign_err_mutex); @@ -3131,20 +3356,25 @@ check_next_foreign: added = row_add_table_to_background_drop_list(table); if (added) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: MySQL is" - " trying to drop table ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs("\n" - "InnoDB: though there are still" - " open handles to it.\n" - "InnoDB: Adding the table to the" - " background drop queue.\n", - stderr); + /* Temporary tables can have read views and we don't + print any warning. */ + if (*table->name != TEMP_TABLE_PREFIX) { + ut_print_timestamp(stderr); + fputs(" InnoDB: Warning: MySQL is" + " trying to drop table ", stderr); + ut_print_name(stderr, trx, TRUE, table->name); + fputs("\n" + "InnoDB: though there are still" + " open handles to it.\n" + "InnoDB: Adding the table to the" + " background drop queue.\n", + stderr); + } else { + ut_a(table->to_be_dropped); + } /* We return DB_SUCCESS to MySQL though the drop will happen lazily later */ - err = DB_SUCCESS; } else { /* The table is already in the background drop list */ @@ -3219,7 +3449,6 @@ check_next_foreign: "WHERE NAME = :table_name\n" "LOCK IN SHARE MODE;\n" "IF (SQL % NOTFOUND) THEN\n" - " COMMIT WORK;\n" " RETURN;\n" "END IF;\n" "found := 1;\n" @@ -3272,7 +3501,6 @@ check_next_foreign: "WHERE TABLE_ID = table_id;\n" "DELETE FROM SYS_TABLES\n" "WHERE ID = table_id;\n" - "COMMIT WORK;\n" "END;\n" , FALSE, trx); @@ -3354,8 +3582,6 @@ check_next_foreign: } funct_exit: - trx_commit_for_mysql(trx); - if (locked_dictionary) { row_mysql_unlock_data_dictionary(trx); } @@ -3532,7 +3758,8 @@ row_rename_table_for_mysql( /* out: error code or DB_SUCCESS */ const char* old_name, /* in: old table name */ const char* new_name, /* in: new table name */ - trx_t* trx) /* in: transaction handle */ + trx_t* trx, /* in: transaction handle */ + ibool commit) /* in: if TRUE then commit trx */ { dict_table_t* table; ulint err; @@ -3557,9 +3784,7 @@ row_rename_table_for_mysql( trx_commit_for_mysql(trx); return(DB_ERROR); - } - - if (row_mysql_is_system_table(new_name)) { + } else if (row_mysql_is_system_table(new_name)) { fprintf(stderr, "InnoDB: Error: trying to create a MySQL" @@ -3578,11 +3803,6 @@ row_rename_table_for_mysql( old_is_tmp = row_is_mysql_tmp_table_name(old_name); new_is_tmp = row_is_mysql_tmp_table_name(new_name); - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - row_mysql_lock_data_dictionary(trx); - table = dict_table_get_low(old_name); if (!table) { @@ -3603,9 +3823,7 @@ row_rename_table_for_mysql( "innodb-troubleshooting.html\n", stderr); goto funct_exit; - } - - if (table->ibd_file_missing) { + } else if (table->ibd_file_missing) { err = DB_TABLE_NOT_FOUND; ut_print_timestamp(stderr); @@ -3618,9 +3836,7 @@ row_rename_table_for_mysql( "innodb-troubleshooting.html\n", stderr); goto funct_exit; - } - - if (new_is_tmp) { + } else if (new_is_tmp) { /* MySQL is doing an ALTER TABLE command and it renames the original table to a temporary table name. We want to preserve the original foreign key constraint definitions despite the @@ -3660,9 +3876,7 @@ row_rename_table_for_mysql( if (err != DB_SUCCESS) { goto end; - } - - if (!new_is_tmp) { + } else if (!new_is_tmp) { /* Rename all constraints. */ info = pars_info_create(); @@ -3860,8 +4074,10 @@ end: } funct_exit: - trx_commit_for_mysql(trx); - row_mysql_unlock_data_dictionary(trx); + + if (commit) { + trx_commit_for_mysql(trx); + } if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); @@ -4066,8 +4282,7 @@ row_check_table_for_mysql( if (!btr_validate_index(index, prebuilt->trx)) { ret = DB_ERROR; } else { - if (!row_scan_and_check_index(prebuilt, - index, &n_rows)) { + if (!row_scan_and_check_index(prebuilt,index, &n_rows)){ ret = DB_ERROR; } @@ -4118,4 +4333,273 @@ row_check_table_for_mysql( return(ret); } + +/*************************************************************************** +Writes information to an undo log about dictionary operation e.g. +rename_table, create_table, create_index, drop table. This information +is used in a rollback of the transaction. */ +static +ulint +row_undo_report_dict_operation( +/*===========================*/ + /* out: DB_SUCCESS or error code */ + ulint op_type, /* in: TRX_UNDO_TABLE_CREATE_OP, + TRX_UNDO_TABLE_RENAME_OP, + TRX_UNDO_TABLE_DROP_OP, or + TRX_UNDO_INDEX_CREATE_OP */ + trx_t* trx, /* in: transaction */ + dict_index_t* index, /* in: + if TRX_UNDO_INDEX_CREATE_OP + index to be created*/ + const char* table_name, /* in: table name or NULL, used in + create table, rename table and + drop table*/ + const char* old_table_name, /* in: old table name or NULL. */ + const char* tmp_table_name) /* in: the intermediate name or NULL */ +{ + dulint roll_ptr; + + return trx_undo_report_dict_operation( + op_type, trx, index, table_name, old_table_name, + tmp_table_name, &roll_ptr); +} + +/*************************************************************************** +Writes information to an undo log about dictionary operation, create_table. +This information is used in a rollback of the transaction. */ + +ulint +row_undo_report_create_table_dict_operation( +/*========================================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: transaction */ + const char* table_name) /* in: table name to create. */ +{ + return row_undo_report_dict_operation( + TRX_UNDO_TABLE_CREATE_OP, trx, NULL, table_name, NULL, NULL); +} + +/*************************************************************************** +Writes information to an undo log about dictionary operation, create_index. +This information is used in a rollback of the transaction. */ + +ulint +row_undo_report_create_index_dict_operation( +/*========================================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: transaction */ + dict_index_t* index) /* in: index created. */ +{ + return row_undo_report_dict_operation( + TRX_UNDO_INDEX_CREATE_OP, trx, index, NULL, NULL, NULL); +} + +/*************************************************************************** +Writes information to an undo log about dictionary operation, rename_table. +This information is used in a rollback of the transaction. */ + +ulint +row_undo_report_rename_table_dict_operation( +/*========================================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: transaction */ + const char* from_table_name,/* in: rename from table table. */ + const char* to_table_name, /* in: rename to table name. */ + const char* tmp_table_name) /* in: intermediate name for table */ +{ + return row_undo_report_dict_operation( + TRX_UNDO_TABLE_RENAME_OP, trx, NULL, + to_table_name, from_table_name, tmp_table_name); +} + +/*************************************************************************** +Writes information to an undo log about dictionary operation, drop table. +This information is used in a rollback of the transaction. */ + +ulint +row_undo_report_drop_table_dict_operation( +/*======================================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: query thread */ + const char* table_name) /* in: table name dropped */ +{ + return row_undo_report_dict_operation( + TRX_UNDO_TABLE_DROP_OP, trx, NULL, table_name, NULL, NULL); +} + +/************************************************************************* +Create query graph for an index creation */ + +ulint +row_create_index_graph_for_mysql( +/*=============================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: trx */ + dict_table_t* table, /* in: table */ + dict_index_t* index) /* in: index */ +{ + ind_node_t* node; /* Index creation node */ + mem_heap_t* heap; /* Memory heap */ + que_thr_t* thr; /* Query thread */ + ulint err = DB_SUCCESS; + + ut_ad(trx && index); + + heap = mem_heap_create(512); + + index->table = table; + node = ind_create_graph_create(index, heap); + thr = pars_complete_graph_for_exec(node, trx, heap); + + ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); + + que_run_threads(thr); + + err = trx->error_state; + + que_graph_free((que_t*) que_node_get_parent(thr)); + + return(err); +} + +/************************************************************************* +Build new indexes to a table by reading a cluster index, +creating a temporary file containing index entries, merge sorting +these index entries and inserting sorted index entries to indexes. */ + +ulint +row_build_index_for_mysql( +/*======================*/ + /* out: 0 or error code */ + trx_t* trx, /* in: transaction */ + dict_table_t* old_table, /* in: Table where rows are + read from */ + dict_table_t* new_table, /* in: Table where indexes are + created. Note that old_table == + new_table if we are creating a + secondary keys. */ + dict_index_t** index, /* in: Indexes to be created */ + ibool new_primary, /* in: new primary key + i.e. clustered index will be build + for this table */ + ulint num_of_keys) /* in: Number of indexes to be + created */ +{ + merge_file_t** merge_files; + mem_heap_t* file_heap; + ulint index_num; + ulint error = DB_SUCCESS; + + ut_ad(trx && old_table && new_table && index && num_of_keys); + + trx_start_if_not_started(trx); + + /* Allocate memory for merge file data structure and initialize + fields */ + + file_heap = mem_heap_create( + num_of_keys * (sizeof(merge_file_t) + sizeof(merge_file_t*))); + + merge_files = mem_heap_alloc( + file_heap, num_of_keys * sizeof(merge_file_t*)); + + for (index_num = 0; index_num < num_of_keys; index_num++) { + + merge_files[index_num] = + row_merge_create_file_structure(file_heap); + } + + + /* Read clustered index of the table and create files for + secondary index entries for merge sort */ + + error = row_merge_read_clustered_index( + trx, old_table, index, merge_files, num_of_keys); + + if (error != DB_SUCCESS) { + + return(error); + } + + trx_start_if_not_started(trx); + + /* Now we have files containing index entries ready for + sorting and inserting. */ + + for (index_num = 0; index_num < num_of_keys; index_num++) { + + /* Do a merge sort and insert from those files + which we have written at least one block */ + + if (merge_files[index_num]->num_of_blocks > 0) { + dulint offset = ut_dulint_create(0, 0); + + /* Merge sort file using linked list merge + sort for files. */ + + offset = row_merge_sort_linked_list_in_disk( + index[index_num], + merge_files[index_num]->file, + (int *)&error); + + if (error != DB_SUCCESS) { + trx->error_key_num = index_num; + goto func_exit; + } + + /* Insert sorted index entries to the table. */ + + error = row_merge_insert_index_tuples( + trx, + index[index_num], + new_table, + merge_files[index_num]->file, + ut_dulint_create(0,0)); + + if (error != DB_SUCCESS) { + trx->error_key_num = index_num; + goto func_exit; + } + } + } + +func_exit: + if (error == DB_SUCCESS && new_primary) { + row_merge_mark_prebuilt_obsolete(trx, old_table); + } + + mem_heap_free(file_heap); + + return(error); +} + +/************************************************************************* +Remove those indexes which were created before a error happened in +the index build */ + +ulint +row_remove_indexes_for_mysql( +/*=========================*/ + /* out: 0 or error code */ + trx_t* trx, /* in: transaction */ + dict_table_t* table, /* in: Table where index is created */ + dict_index_t** index, /* in: Indexes to be created */ + ulint num_created) /* in: Number of indexes created + before error and now must be removed */ +{ + ulint key_num; + ulint error = DB_SUCCESS; + + ut_ad(trx && table && index); + + for (key_num = 0; key_num < num_created; key_num++) { + error = row_merge_remove_index(index[key_num], table, trx); + + if (error != DB_SUCCESS) { + break; + } + } + + return(error); +} #endif /* !UNIV_HOTBACKUP */ diff --git a/row/row0purge.c b/row/row0purge.c index 74cf0999989..aa2f42ddfb1 100644 --- a/row/row0purge.c +++ b/row/row0purge.c @@ -227,7 +227,7 @@ row_purge_remove_sec_if_poss_low( /* Not found */ /* fputs("PURGE:........sec entry not found\n", stderr); */ - /* dtuple_print(entry); */ + /* dtuple_print(stderr, entry); */ btr_pcur_close(&pcur); mtr_commit(&mtr); diff --git a/row/row0row.c b/row/row0row.c index 58b4fc9f5c9..6ede1d71e47 100644 --- a/row/row0row.c +++ b/row/row0row.c @@ -57,15 +57,16 @@ row_get_trx_id_offset( } /********************************************************************* -When an insert to a table is performed, this function builds the entry which -has to be inserted to an index on the table. */ +When an insert or purge to a table is performed, this function builds +the entry to be inserted into or purged from an index on the table. */ dtuple_t* row_build_index_entry( /*==================*/ - /* out: index entry which should be inserted */ - const dtuple_t* row, /* in: row which should be inserted to the - table */ + /* out: index entry which should be + inserted or purged */ + const dtuple_t* row, /* in: row which should be + inserted or purged */ row_ext_t* ext, /* in: externally stored column prefixes, or NULL */ dict_index_t* index, /* in: index on the table */ diff --git a/row/row0uins.c b/row/row0uins.c index d8c24b16050..28d317b3551 100644 --- a/row/row0uins.c +++ b/row/row0uins.c @@ -28,6 +28,7 @@ Created 2/25/1997 Heikki Tuuri #include "que0que.h" #include "ibuf0ibuf.h" #include "log0log.h" +#include "row0merge.h" /******************************************************************* Removes a clustered index record. The pcur in node was positioned on the @@ -52,6 +53,25 @@ row_undo_ins_remove_clust_rec( ut_a(success); if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) { + trx_t* trx; + ibool thawed_dictionary = FALSE; + ibool locked_dictionary = FALSE; + + trx = node->trx; + + if (trx->dict_operation_lock_mode == RW_S_LATCH) { + row_mysql_unfreeze_data_dictionary(trx); + + thawed_dictionary = TRUE; + } + + if (trx->dict_operation_lock_mode == 0 + || trx->dict_operation_lock_mode != RW_X_LATCH) { + + row_mysql_lock_data_dictionary(trx); + + locked_dictionary = TRUE; + } /* Drop the index tree associated with the row in SYS_INDEXES table: */ @@ -65,6 +85,14 @@ row_undo_ins_remove_clust_rec( success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur), &mtr); ut_a(success); + + if (locked_dictionary) { + row_mysql_unlock_data_dictionary(trx); + } + + if (thawed_dictionary) { + row_mysql_freeze_data_dictionary(trx); + } } btr_cur = btr_pcur_get_btr_cur(&(node->pcur)); @@ -211,6 +239,36 @@ retry: return(err); } +/*************************************************************** +Parses the rec_type undo record. */ + +byte* +row_undo_ins_parse_rec_type_and_table_id( +/*=====================================*/ + /* out: ptr to next field to parse */ + undo_node_t* node, /* in: row undo node */ + dulint* table_id) /* out: table id */ +{ + byte* ptr; + dulint undo_no; + ulint type; + ulint dummy; + ibool dummy_extern; + + ut_ad(node && node->trx); + + ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy, + &dummy_extern, &undo_no, table_id); + + node->rec_type = type; + + if (node->rec_type == TRX_UNDO_DICTIONARY_REC) { + node->trx->dict_operation = TRUE; + } + + return ptr; +} + /*************************************************************** Parses the row reference and other info in a fresh insert undo record. */ static @@ -219,39 +277,67 @@ row_undo_ins_parse_undo_rec( /*========================*/ undo_node_t* node) /* in: row undo node */ { - dict_index_t* clust_index; byte* ptr; - dulint undo_no; dulint table_id; - ulint type; - ulint dummy; - ibool dummy_extern; ut_ad(node); - ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy, - &dummy_extern, &undo_no, &table_id); - ut_ad(type == TRX_UNDO_INSERT_REC); - node->rec_type = type; + ptr = row_undo_ins_parse_rec_type_and_table_id(node, &table_id); - node->table = dict_table_get_on_id(table_id, node->trx); + ut_ad(node->rec_type == TRX_UNDO_INSERT_REC + || node->rec_type == TRX_UNDO_DICTIONARY_REC); - if (node->table == NULL) { + if (node->rec_type == TRX_UNDO_INSERT_REC) { - return; + trx_t* trx; + ibool thawed_dictionary = FALSE; + ibool locked_dictionary = FALSE; + + trx = node->trx; + + /* If it's sytem table then we have to acquire the + dictionary lock in X mode.*/ + + if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0) { + if (trx->dict_operation_lock_mode == RW_S_LATCH) { + row_mysql_unfreeze_data_dictionary(trx); + + thawed_dictionary = TRUE; + } + + if (trx->dict_operation_lock_mode == 0 + || trx->dict_operation_lock_mode != RW_X_LATCH) { + + row_mysql_lock_data_dictionary(trx); + + locked_dictionary = TRUE; + } + } + + node->table = dict_table_get_on_id(table_id, trx); + + /* If we can't find the table or .ibd file is missing, + we skip the UNDO.*/ + if (node->table == NULL || node->table->ibd_file_missing) { + + node->table = NULL; + } else { + dict_index_t* clust_index; + + clust_index = dict_table_get_first_index(node->table); + + ptr = trx_undo_rec_get_row_ref( + ptr, clust_index, &node->ref, node->heap); + } + + if (locked_dictionary) { + row_mysql_unlock_data_dictionary(trx); + } + + if (thawed_dictionary) { + row_mysql_freeze_data_dictionary(trx); + } } - - if (node->table->ibd_file_missing) { - /* We skip undo operations to missing .ibd files */ - node->table = NULL; - - return; - } - - clust_index = dict_table_get_first_index(node->table); - - ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), - node->heap); } /*************************************************************** @@ -265,44 +351,49 @@ row_undo_ins( /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ undo_node_t* node) /* in: row undo node */ { - dtuple_t* entry; - ibool found; - ulint err; + ulint err = DB_SUCCESS; ut_ad(node); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(node); - if (node->table == NULL) { - found = FALSE; - } else { - found = row_undo_search_clust_to_pcur(node); - } + /* Dictionary records are undone in a separate function */ + + if (node->rec_type == TRX_UNDO_DICTIONARY_REC) { + + err = row_undo_build_dict_undo_list(node); + + } else if (!node->table || !row_undo_search_clust_to_pcur(node)) { - if (!found) { trx_undo_rec_release(node->trx, node->undo_no); - return(DB_SUCCESS); - } + } else { - node->index = dict_table_get_next_index( - dict_table_get_first_index(node->table)); + /* Iterate over all the indexes and undo the insert.*/ - while (node->index != NULL) { - entry = row_build_index_entry(node->row, node->ext, - node->index, node->heap); - err = row_undo_ins_remove_sec(node->index, entry); + /* Skip the clustered index (the first index) */ + node->index = dict_table_get_next_index( + dict_table_get_first_index(node->table)); - if (err != DB_SUCCESS) { + while (node->index != NULL) { + dtuple_t* entry; - return(err); + entry = row_build_index_entry(node->row, node->ext, + node->index, node->heap); + + err = row_undo_ins_remove_sec(node->index, entry); + + if (err != DB_SUCCESS) { + + return(err); + } + + node->index = dict_table_get_next_index(node->index); } - node->index = dict_table_get_next_index(node->index); + err = row_undo_ins_remove_clust_rec(node); } - err = row_undo_ins_remove_clust_rec(node); - return(err); } diff --git a/row/row0umod.c b/row/row0umod.c index 4360799692e..812cb5e9758 100644 --- a/row/row0umod.c +++ b/row/row0umod.c @@ -426,7 +426,15 @@ row_undo_mod_del_unmark_sec_and_undo_update( log_free_check(); mtr_start(&mtr); - found = row_search_index_entry(index, entry, mode, &pcur, &mtr); + /* Check if the index was created after this transaction was + started because then this index will not have the changes made + by this transaction.*/ + if (*index->name != TEMP_TABLE_PREFIX) { + found = row_search_index_entry(index, entry, mode, &pcur, &mtr); + } else { + + return(err); + } if (!found) { fputs("InnoDB: error in sec index entry del undo in\n" diff --git a/row/row0undo.c b/row/row0undo.c index 0077059745d..cedf198e90d 100644 --- a/row/row0undo.c +++ b/row/row0undo.c @@ -26,6 +26,7 @@ Created 1/8/1997 Heikki Tuuri #include "row0umod.h" #include "row0mysql.h" #include "srv0srv.h" +#include "row0merge.h" /* How to undo row operations? (1) For an insert, we have stored a prefix of the clustered index record @@ -124,6 +125,7 @@ row_undo_node_create( undo->state = UNDO_NODE_FETCH_NEXT; undo->trx = trx; + undo->rec_sub_type = TRX_UNDO_NULL_REC; btr_pcur_init(&(undo->pcur)); @@ -350,3 +352,352 @@ row_undo_step( return(thr); } + +/*************************************************************** +Parses the info in a fresh insert undo record containing a +dictionary change. */ +static +ulint +row_undo_dictionary_parse_undo_rec( +/*===============================*/ + /* out: DB_SUCCESS or DB_ERROR */ + undo_node_t* node) /* in: row undo node */ +{ + byte* ptr; + dulint table_id; + dulint index_id; + ulint len; + + ut_ad(node); + + node->rec_type = trx_undo_rec_get_type(node->undo_rec); + node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec); + + ptr = trx_undo_rec_get_ptr(node->undo_rec, node->undo_no); + + ut_a(node->rec_type == TRX_UNDO_DICTIONARY_REC); + + /* Read dictionary rec sub type */ + node->rec_sub_type = mach_read_from_1(ptr); + ptr++; + + /* Parse subtype parameters */ + + switch (node->rec_sub_type) { + + case TRX_UNDO_INDEX_CREATE_REC: + + table_id = mach_dulint_read_much_compressed(ptr); + len = mach_dulint_get_much_compressed_size(table_id); + ptr += len; + + index_id = mach_dulint_read_much_compressed(ptr); + len = mach_dulint_get_much_compressed_size(index_id); + ptr += len; + + node->table = dict_table_get_on_id(table_id, node->trx); + node->index = NULL; + + if (!node->table) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: [Error]: Table %lu %lu not found " + "in index create undo rec\n", + (ulong) ut_dulint_get_high(table_id), + (ulong) ut_dulint_get_low(table_id)); + goto err_exit; + } else if (ut_dulint_is_zero(index_id)) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: [Error]: Index id missing from " + "index create undo rec\n"); +err_exit: + mutex_enter(&kernel_mutex); + trx_print(stderr, node->trx, 1024); + mutex_exit(&kernel_mutex); + + return(DB_ERROR); + } else { + node->index = dict_index_get_on_id_low( + node->table, index_id); + } + + if (node->table->ibd_file_missing || !node->index) { + /* We skip undo operations to missing .ibd files + and missing indexes */ + node->table = NULL; + node->index = NULL; + + return(DB_SUCCESS); + } + + break; + + case TRX_UNDO_TABLE_CREATE_REC: + case TRX_UNDO_TABLE_DROP_REC: + len = strlen((char *)ptr) + 1; + + node->new_table_name = mem_heap_strdup(node->heap, (char *)ptr); + ptr += len; + + ut_ad(*node->new_table_name == TEMP_TABLE_PREFIX); + break; + + case TRX_UNDO_TABLE_RENAME_REC: + len = strlen((char *)ptr) + 1; + + node->new_table_name = mem_heap_strdup(node->heap, (char *)ptr); + ptr += len; + + ut_ad(*node->new_table_name == TEMP_TABLE_PREFIX); + + len = strlen((char *)ptr) + 1; + + node->old_table_name = mem_heap_strdup(node->heap, (char *)ptr); + ptr += len; + + len = strlen((char *)ptr) + 1; + + node->tmp_table_name = mem_heap_strdup(node->heap, (char *)ptr); + ptr += len; + + ut_ad(*node->tmp_table_name == TEMP_TABLE_PREFIX); + break; + + default: + ut_print_timestamp(stderr); + + fprintf(stderr, + " InnoDB: [Error]: Undefined rec_sub_type = %lu at " + "row_undo_dictionary_parse_undo_rec\n", + (ulong)node->rec_sub_type); + + mutex_enter(&kernel_mutex); + trx_print(stderr, node->trx, 1024); + mutex_exit(&kernel_mutex); + + return(DB_ERROR); + } + + return(DB_SUCCESS); +} + +/*************************************************************** + Currently we gather all the information that is required to do the + UNDO. The actual UNDO is done later in row_undo_dictionary().*/ + +ulint +row_undo_build_dict_undo_list( +/*==========================*/ + /* out: DB_SUCCESS or error code */ + undo_node_t* node) /* in: row undo node */ +{ + trx_t* trx; + dict_undo_t* dict_undo; + ulint err = DB_SUCCESS; + ibool locked_dictionary = FALSE; + ibool thawed_dictionary = FALSE; + + ut_ad(node); + ut_ad(node->state == UNDO_NODE_INSERT); + ut_a(node->trx->dict_operation); + + err = row_undo_dictionary_parse_undo_rec(node); + + if (err != DB_SUCCESS) { + + goto func_exit; + } + + trx = node->trx; + + if (trx->dict_operation_lock_mode == RW_S_LATCH) { + row_mysql_unfreeze_data_dictionary(trx); + + thawed_dictionary = TRUE; + } + + if (trx->dict_operation_lock_mode == 0 + || trx->dict_operation_lock_mode != RW_X_LATCH) { + + row_mysql_lock_data_dictionary(trx); + + locked_dictionary = TRUE; + } + + /* We will do our own deletes */ + trx->table_id = ut_dulint_create(0, 0); + + if (trx->dict_undo_list == NULL) { + dict_undo_create_list(trx); + } + + /* Create an element and append to the list */ + dict_undo = dict_undo_create_element(trx); + + dict_undo->op_type = node->rec_sub_type; + + switch (node->rec_sub_type) { + + case TRX_UNDO_INDEX_CREATE_REC: + + if (node->table && node->index) { + ut_a(node->index->table == node->table); + + dict_undo->data.index = node->index; + } else { + dict_undo->data.index = NULL; + } + + break; + + case TRX_UNDO_TABLE_DROP_REC: + case TRX_UNDO_TABLE_CREATE_REC: + + dict_undo->data.table.old_table = dict_table_get_low( + node->new_table_name); + + break; + + case TRX_UNDO_TABLE_RENAME_REC: + + dict_undo->data.table.old_table = dict_table_get_low( + node->old_table_name); + + dict_undo->data.table.tmp_table = dict_table_get_low( + node->tmp_table_name); + + dict_undo->data.table.new_table = dict_table_get_low( + node->new_table_name); + + if (dict_undo->data.table.tmp_table + && dict_undo->data.table.old_table + && dict_undo->data.table.new_table) { + + /* This can't happen */ + ut_error; + } + + break; + + default: + + ut_error; + + break; + } + + if (locked_dictionary) { + row_mysql_unlock_data_dictionary(trx); + } + + if (thawed_dictionary) { + row_mysql_freeze_data_dictionary(trx); + } + +func_exit: + trx_undo_rec_release(node->trx, node->undo_no); + + return(err); +} + +ulint +row_undo_dictionary( +/*================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: transaction */ + dict_undo_t* dict_undo) /* in: dict undo info */ +{ + ulint err = DB_SUCCESS; + + switch (dict_undo->op_type) { + case TRX_UNDO_INDEX_CREATE_REC: + + err = row_merge_remove_index( + dict_undo->data.index, dict_undo->data.index->table, + trx); + + break; + + /* TODO: We are REDOing the DROP ? */ + case TRX_UNDO_TABLE_DROP_REC: + case TRX_UNDO_TABLE_CREATE_REC: + + if (dict_undo->data.table.old_table) { + + err = row_drop_table_for_mysql_no_commit( + dict_undo->data.table.old_table->name, + trx, FALSE); + } + + break; + + case TRX_UNDO_TABLE_RENAME_REC: + if (!dict_undo->data.table.new_table) { + + /* Old name to tmp name succeeded and new name to old + name succeeded too. We have to be very careful here as + the user could loose the entire table if not done + carefully.*/ + ut_ad(dict_undo->data.table.old_table); + + err = row_rename_table_for_mysql( + dict_undo->data.table.old_table->name, + dict_undo->data.table.new_table->name, + trx, FALSE); + + if (err == DB_SUCCESS) { + err = row_rename_table_for_mysql( + dict_undo->data.table.tmp_table->name, + dict_undo->data.table.old_table->name, + trx, FALSE); + } + + if (err == DB_SUCCESS) { + + err = row_drop_table_for_mysql_no_commit( + dict_undo->data.table.new_table->name, + trx, FALSE); + } + + } else if (dict_undo->data.table.old_table) { + /* Rename to tmp failed.*/ + + ut_ad(!dict_undo->data.table.tmp_table); + + if (dict_undo->data.table.new_table) { + + err = row_drop_table_for_mysql_no_commit( + dict_undo->data.table.new_table->name, + trx, FALSE); + } + + } else if (dict_undo->data.table.tmp_table) { + /* Rename to tmp was OK. We need to UNDO it.*/ + + ut_ad(!dict_undo->data.table.old_table); + + err = row_rename_table_for_mysql( + dict_undo->data.table.tmp_table->name, + dict_undo->data.table.old_table->name, + trx, FALSE); + + if (dict_undo->data.table.new_table) { + + err = row_drop_table_for_mysql_no_commit( + dict_undo->data.table.new_table->name, + trx, FALSE); + } + + } else { + /* Shouldn't happen */ + ut_error; + } + + default: + ut_error; + } + + return(err); +} + diff --git a/trx/trx0rec.c b/trx/trx0rec.c index 20a3b7142e4..d14ce08ac18 100644 --- a/trx/trx0rec.c +++ b/trx/trx0rec.c @@ -130,6 +130,105 @@ trx_undo_left( return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END); } +/************************************************************************** +Get the pointer to where the data for the undo log record will be written.*/ +static +byte* +trx_undo_page_get_ptr( +/*==================*/ + /* out: ptr to where the undo log + record data will be written, + 0 if not enough space.*/ + page_t* undo_page, /* in: undo log page */ + ulint need) /* in: need these man bytes */ +{ + byte* ptr; /* pointer within undo_page */ + ulint first_free; /* offset within undo page */ + + ut_ad(undo_page); + + ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT); + + first_free = mach_read_from_2( + undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE); + + /* Start writing the undo information from the first free + bytes in the undo page */ + ptr = undo_page + first_free; + + ut_ad(first_free <= UNIV_PAGE_SIZE); + + /* NOTE: the value need must be big enough such that the + general fields written below fit on the undo log page */ + + if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < need)) { + + /* Error, not enough space */ + ptr = 0; + + } else { + /* Reserve 2 bytes for the pointer to the next undo log + record */ + ptr += 2; + } + + return(ptr); +} + +/************************************************************************** +Set the next and previous pointers in the undo page for the undo record +that was written to ptr. Update the first free value by the number of bytes +written for this undo record.*/ +static +ulint +trx_undo_page_set_next_prev_and_add( +/*================================*/ + /* out: offset of the inserted entry + on the page if succeeded, 0 if fail */ + page_t* undo_page, /* in/out: undo log page */ + byte* ptr, /* in: ptr up to where data has been + written on this undo page. */ + mtr_t* mtr) /* in: mtr */ +{ + ulint first_free; /* offset within undo_page */ + ulint end_of_rec; /* offset within undo_page */ + byte* ptr_to_first_free; + /* pointer within undo_page + that points to the next free + offset value within undo_page.*/ + + ut_ad(ptr > undo_page); + ut_ad(ptr < undo_page + UNIV_PAGE_SIZE); + + if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) { + + return(0); + } + + ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE; + + first_free = mach_read_from_2(ptr_to_first_free); + + /* Write offset of the previous undo log record */ + mach_write_to_2(ptr, first_free); + ptr += 2; + + end_of_rec = ptr - undo_page; + + /* Write offset of the next undo log record */ + mach_write_to_2(undo_page + first_free, end_of_rec); + + /* Update the offset to first free undo record */ + mach_write_to_2(ptr_to_first_free, end_of_rec); + + /* Write this log entry to the UNDO log */ + trx_undof_page_add_undo_rec_log(undo_page, first_free, + end_of_rec, mtr); + + return(first_free); +} + /************************************************************************** Reports in the undo log of an insert of a clustered index record. */ static @@ -147,9 +246,6 @@ trx_undo_page_report_insert( { ulint first_free; byte* ptr; - ulint len; - const dfield_t* field; - ulint flen; ulint i; ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR @@ -172,31 +268,24 @@ trx_undo_page_report_insert( ptr += 2; /* Store first some general parameters to the undo log */ - mach_write_to_1(ptr, TRX_UNDO_INSERT_REC); - ptr++; - - len = mach_dulint_write_much_compressed(ptr, trx->undo_no); - ptr += len; - - len = mach_dulint_write_much_compressed(ptr, (index->table)->id); - ptr += len; + *ptr++ = TRX_UNDO_INSERT_REC; + ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no); + ptr += mach_dulint_write_much_compressed(ptr, index->table->id); /*----------------------------------------*/ /* Store then the fields required to uniquely determine the record to be inserted in the clustered index */ for (i = 0; i < dict_index_get_n_unique(index); i++) { - field = dtuple_get_nth_field(clust_entry, i); - - flen = dfield_get_len(field); + const dfield_t* field = dtuple_get_nth_field(clust_entry, i); + ulint flen = dfield_get_len(field); if (trx_undo_left(undo_page, ptr) < 5) { return(0); } - len = mach_write_compressed(ptr, flen); - ptr += len; + ptr += mach_write_compressed(ptr, flen); if (flen != UNIV_SQL_NULL) { if (trx_undo_left(undo_page, ptr) < flen) { @@ -209,27 +298,192 @@ trx_undo_page_report_insert( } } - /*----------------------------------------*/ - /* Write pointers to the previous and the next undo log records */ + return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr)); +} - if (trx_undo_left(undo_page, ptr) < 2) { +/************************************************************************** +Reports in the undo log of an index create */ +static +ulint +trx_undo_page_report_index_create( +/*==============================*/ + /* out: offset of the inserted entry + on the page if succeed, 0 if fail */ + page_t* undo_page, /* in: undo log page */ + trx_t* trx, /* in: transaction */ + dict_index_t* index, /* in: index */ + mtr_t* mtr) /* in: mtr */ +{ + byte* ptr; + ut_ad(undo_page && trx && index && mtr); + + /* Get the pointer to where we will write our undo data. */ + + ptr = trx_undo_page_get_ptr(undo_page, 1 + 11 + 1 + 11 + 11); + + if (UNIV_UNLIKELY(!ptr)) { return(0); } - mach_write_to_2(ptr, first_free); - ptr += 2; + /* This is our internal dictionary undo log record. */ + *ptr++ = TRX_UNDO_DICTIONARY_REC; - mach_write_to_2(undo_page + first_free, ptr - undo_page); + ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no); - mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, - ptr - undo_page); + /* The sub type (discriminator) of this undo dictionary record */ + *ptr++ = TRX_UNDO_INDEX_CREATE_REC; - /* Write the log entry to the REDO log of this change in the UNDO - log */ - trx_undof_page_add_undo_rec_log(undo_page, first_free, - ptr - undo_page, mtr); - return(first_free); + /* For index create, we need both the table id and the index id + to be stored in the undo log record.*/ + + ptr += mach_dulint_write_much_compressed(ptr, index->table->id); + ptr += mach_dulint_write_much_compressed(ptr, index->id); + + return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr)); +} + +/************************************************************************** +Reports in the undo log of a table create */ +static +ulint +trx_undo_page_report_table_create( +/*==============================*/ + /* out: offset of the inserted entry + on the page if succeed, 0 if fail */ + page_t* undo_page, /* in: undo log page */ + trx_t* trx, /* in: transaction */ + const char* table_name, /* in: table name */ + mtr_t* mtr) /* in: mtr */ +{ + byte* ptr; + ulint name_len; + + ut_ad(undo_page && trx && table_name && mtr); + + name_len = strlen(table_name) + 1; + + /* Get the pointer to where we will write our undo data */ + + ptr = trx_undo_page_get_ptr(undo_page, 1 + 11 + 1 + name_len); + + if (UNIV_UNLIKELY(!ptr)) { + return(0); + } + + /* The type (discriminator) of this undo log */ + *ptr++ = TRX_UNDO_DICTIONARY_REC; + + ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no); + + /* The sub type (discriminator) of this dictionary undo log */ + *ptr++ = TRX_UNDO_TABLE_CREATE_REC; + + /* For table create we need to store table name */ + memcpy(ptr, table_name, name_len); + ptr += name_len; + + return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr)); +} + +/************************************************************************** +Reports in the undo log of a table drop */ +static +ulint +trx_undo_page_report_table_drop( +/*============================*/ + /* out: offset of the inserted entry + on the page if succeed, 0 if fail */ + page_t* undo_page, /* in: undo log page */ + trx_t* trx, /* in: transaction */ + const char* table_name, /* in: table name */ + mtr_t* mtr) /* in: mtr */ +{ + byte* ptr; + ulint name_len; + + ut_ad(undo_page && trx && table_name && mtr); + + name_len = strlen(table_name) + 1; + + /* Get the pointer to where we will write our undo data */ + + ptr = trx_undo_page_get_ptr(undo_page, 1 + 11 + 1 + name_len); + + if (UNIV_UNLIKELY(!ptr)) { + return(0); + } + + *ptr++ = TRX_UNDO_DICTIONARY_REC; + + ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no); + + /* The sub type (discriminator) of this dictionary undo log */ + *ptr++ = TRX_UNDO_TABLE_DROP_REC; + + /* For table drop we need to store a table name */ + memcpy(ptr, table_name, name_len); + ptr += name_len; + + return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr)); +} + +/************************************************************************** +Reports in the undo log of a table rename */ +static +ulint +trx_undo_page_report_table_rename( +/*==============================*/ + /* out: offset of the inserted entry + on the page if succeed, 0 if fail */ + page_t* undo_page, /* in: undo log page */ + trx_t* trx, /* in: transaction */ + const char* new_table_name, /* in: new table name */ + const char* old_table_name, /* in: old table name */ + const char* tmp_table_name, /* in: the temp name */ + mtr_t* mtr) /* in: mtr */ +{ + byte* ptr; + ulint new_name_len; + ulint old_name_len; + ulint tmp_name_len; + + ut_ad(undo_page && trx && new_table_name && old_table_name && mtr); + + new_name_len = strlen(new_table_name) + 1; + old_name_len = strlen(old_table_name) + 1; + tmp_name_len = strlen(tmp_table_name) + 1; + + /* Get the pointer to where we will write our undo data. */ + + ptr = trx_undo_page_get_ptr(undo_page, 1 + 11 + 1 + + new_name_len + old_name_len + + tmp_name_len); + + if (UNIV_UNLIKELY(!ptr)) { + return(0); + } + + /* The type (discriminator) of this undo log */ + *ptr++ = TRX_UNDO_DICTIONARY_REC; + ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no); + + /* The sub type (discriminator) of this dictionary undo log */ + *ptr++ = TRX_UNDO_TABLE_RENAME_REC; + + /* For table rename we need to store the new table name and + the old table name */ + + memcpy(ptr, new_table_name, new_name_len); + ptr += new_name_len; + + memcpy(ptr, old_table_name, old_name_len); + ptr += old_name_len; + + memcpy(ptr, tmp_table_name, tmp_name_len); + ptr += tmp_name_len; + + return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr)); } /************************************************************************** @@ -251,7 +505,6 @@ trx_undo_rec_get_pars( dulint* table_id) /* out: table id */ { byte* ptr; - ulint len; ulint type_cmpl; ptr = undo_rec + 2; @@ -270,12 +523,10 @@ trx_undo_rec_get_pars( *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT; *undo_no = mach_dulint_read_much_compressed(ptr); - len = mach_dulint_get_much_compressed_size(*undo_no); - ptr += len; + ptr += mach_dulint_get_much_compressed_size(*undo_no); *table_id = mach_dulint_read_much_compressed(ptr); - len = mach_dulint_get_much_compressed_size(*table_id); - ptr += len; + ptr += mach_dulint_get_much_compressed_size(*table_id); return(ptr); } @@ -671,8 +922,6 @@ trx_undo_update_rec_get_sys_cols( dulint* roll_ptr, /* out: roll ptr */ ulint* info_bits) /* out: info bits state */ { - ulint len; - /* Read the state of the info bits */ *info_bits = mach_read_from_1(ptr); ptr += 1; @@ -680,12 +929,10 @@ trx_undo_update_rec_get_sys_cols( /* Read the values of the system columns */ *trx_id = mach_dulint_read_compressed(ptr); - len = mach_dulint_get_compressed_size(*trx_id); - ptr += len; + ptr += mach_dulint_get_compressed_size(*trx_id); *roll_ptr = mach_dulint_read_compressed(ptr); - len = mach_dulint_get_compressed_size(*roll_ptr); - ptr += len; + ptr += mach_dulint_get_compressed_size(*roll_ptr); return(ptr); } @@ -1142,6 +1389,189 @@ trx_undo_report_row_operation( return(DB_SUCCESS); } +/*************************************************************************** +Writes information to an undo log about dictionary operation e.g. +rename_table, create_table, create_index, drop table. This information +is used in a rollback of the transaction. */ + +ulint +trx_undo_report_dict_operation( +/*===========================*/ + /* out: DB_SUCCESS or error code */ + ulint op_type, /* in: TRX_UNDO_TABLE_CREATE_OP, + TRX_UNDO_TABLE_RENAME_OP, + TRX_UNDO_TABLE_DROP_OP, or + TRX_UNDO_INDEX_CREATE_OP */ + trx_t* trx, /* in: trx */ + dict_index_t* index, /* in: + if TRX_UNDO_INDEX_CREATE_OP + index to be created*/ + const char* table_name, /* in: table name or NULL, used in + create table, rename table and + drop table*/ + const char* old_table_name, /* in: old table name or NULL. + used in rename table */ + const char* tmp_table_name, /* in: the intermediate name used + for renaming */ + dulint* roll_ptr) /* out: rollback pointer to the + inserted undo log record */ +{ + trx_undo_t* undo; + buf_block_t* undo_block; + ulint offset; + ulint page_no; + trx_rseg_t* rseg; + mtr_t mtr; + + ut_ad(trx); + +#ifdef UNIV_DEBUG + switch (op_type) { + case TRX_UNDO_TABLE_RENAME_OP: + ut_ad(old_table_name); + case TRX_UNDO_TABLE_DROP_OP: + case TRX_UNDO_TABLE_CREATE_OP: + ut_ad(table_name); + break; + + case TRX_UNDO_INDEX_CREATE_OP: + ut_ad(index); + break; + default: + ut_error; + } +#endif + + rseg = trx->rseg; + + mutex_enter(&(trx->undo_mutex)); + + /* If the undo log is not assigned yet, assign one */ + + if (trx->insert_undo == NULL) { + + trx_undo_assign_undo(trx, TRX_UNDO_INSERT); + } + + undo = trx->insert_undo; + + if (undo == NULL) { + /* Did not succeed: out of space */ + mutex_exit(&(trx->undo_mutex)); + + return(DB_OUT_OF_FILE_SPACE); + } + + page_no = undo->last_page_no; + + mtr_start(&mtr); + + for (;;) { + undo_block = buf_page_get_gen(undo->space, undo->zip_size, + page_no, RW_X_LATCH, + undo->guess_block, BUF_GET, + __FILE__, __LINE__, &mtr); + +#ifdef UNIV_SYNC_DEBUG + buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE); +#endif /* UNIV_SYNC_DEBUG */ + + switch (op_type) { + + case TRX_UNDO_TABLE_CREATE_OP: + offset = trx_undo_page_report_table_create( + undo_block->frame, trx, table_name, &mtr); + break; + + case TRX_UNDO_INDEX_CREATE_OP: + offset = trx_undo_page_report_index_create( + undo_block->frame, trx, index, &mtr); + break; + + case TRX_UNDO_TABLE_RENAME_OP: + offset = trx_undo_page_report_table_rename( + undo_block->frame, trx, table_name, + old_table_name, tmp_table_name, &mtr); + break; + + case TRX_UNDO_TABLE_DROP_OP: + offset = trx_undo_page_report_table_drop( + undo_block->frame, trx, table_name, &mtr); + break; + + default: + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: [Error]: Undefined op_type = %lu " + "at trx_undo_report_dict_operation\n", + (ulong) op_type); + + mutex_enter(&kernel_mutex); + trx_print(stderr, trx, 1024); + mutex_exit(&kernel_mutex); + + return(DB_ERROR); + } + + if (offset == 0) { + /* The record did not fit on the page. We erase the + end segment of the undo log page and write a log + record of it: this is to ensure that in the debug + version the replicate page constructed using the log + records stays identical to the original page */ + + trx_undo_erase_page_end(undo_block->frame, &mtr); + } + + mtr_commit(&mtr); + + if (offset != 0) { + /* Success */ + + break; + } + + ut_ad(page_no == undo->last_page_no); + + /* We have to extend the undo log by one page */ + + mtr_start(&mtr); + + /* When we add a page to an undo log, this is analogous to + a pessimistic insert in a B-tree, and we must reserve the + counterpart of the tree latch, which is the rseg mutex. */ + + mutex_enter(&(rseg->mutex)); + + page_no = trx_undo_add_page(trx, undo, &mtr); + + mutex_exit(&(rseg->mutex)); + + if (page_no == FIL_NULL) { + /* Did not succeed: out of space */ + + mutex_exit(&(trx->undo_mutex)); + mtr_commit(&mtr); + + return(DB_OUT_OF_FILE_SPACE); + } + } + + undo->empty = FALSE; + undo->top_page_no = page_no; + undo->top_offset = offset; + undo->top_undo_no = trx->undo_no; + undo->guess_block = undo_block; + + UT_DULINT_INC(trx->undo_no); + + mutex_exit(&(trx->undo_mutex)); + + *roll_ptr = trx_undo_build_roll_ptr(TRUE, rseg->id, page_no, offset); + + return(DB_SUCCESS); +} + /*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/ /********************************************************************** diff --git a/trx/trx0roll.c b/trx/trx0roll.c index 2f5b7b170ec..cb02462ee8f 100644 --- a/trx/trx0roll.c +++ b/trx/trx0roll.c @@ -425,7 +425,8 @@ trx_rollback_or_clean_all_without_sess( dict_table_t* table; ib_longlong rows_to_undo; const char* unit = ""; - int err; + int err = DB_SUCCESS; + ibool dictionary_locked = FALSE; mutex_enter(&kernel_mutex); @@ -530,8 +531,10 @@ loop: trx->mysql_process_no = os_proc_get_number(); + /* TODO: Doesn't seem right */ if (trx->dict_operation) { row_mysql_lock_data_dictionary(trx); + dictionary_locked = TRUE; } que_run_threads(thr); @@ -552,7 +555,8 @@ loop: mutex_exit(&kernel_mutex); - if (trx->dict_operation) { + if (trx->dict_operation && !ut_dulint_is_zero(trx->table_id)) { + /* If the transaction was for a dictionary operation, we drop the relevant table, if it still exists */ @@ -573,9 +577,35 @@ loop: ut_a(err == (int) DB_SUCCESS); } + } else if (trx->dict_undo_list) { + + dict_undo_t* dict_undo; + + ut_a(trx->dict_undo_list); + + dict_undo = UT_LIST_GET_FIRST(*trx->dict_undo_list); + + fputs("InnoDB: UNDO dict entries\n", stderr); + + while (dict_undo && err == DB_SUCCESS) { + + dict_undo = UT_LIST_GET_NEXT(node, dict_undo); + + if (dict_undo) { + err = row_undo_dictionary(trx, dict_undo); + } + } + + ut_a(err == (int) DB_SUCCESS); + + dict_undo_free_list(trx); + + mutex_enter(&kernel_mutex); + trx_commit_off_kernel(trx); + mutex_exit(&kernel_mutex); } - if (trx->dict_operation) { + if (dictionary_locked) { row_mysql_unlock_data_dictionary(trx); } @@ -1245,7 +1275,11 @@ trx_finish_rollback_off_kernel( } #endif /* UNIV_DEBUG */ - trx_commit_off_kernel(trx); + /* If there are dict UNDO records that need to be undone then + we commit the transaction after these dictionary changes are undone.*/ + if (!trx->dict_undo_list) { + trx_commit_off_kernel(trx); + } /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and send reply messages to them */ diff --git a/trx/trx0trx.c b/trx/trx0trx.c index 0deeb339668..e46ec5f2224 100644 --- a/trx/trx0trx.c +++ b/trx/trx0trx.c @@ -127,6 +127,10 @@ trx_create( trx->must_flush_log_later = FALSE; trx->dict_operation = FALSE; + trx->table_id = ut_dulint_create(0, 0); + trx->dict_undo_list = NULL; + trx->dict_redo_list = NULL; + trx->sync_cb = NULL; trx->mysql_thd = NULL; trx->mysql_query_str = NULL; @@ -153,6 +157,7 @@ trx_create( trx->undo_no_arr = NULL; trx->error_state = DB_SUCCESS; + trx->error_key_num = 0; trx->detailed_error[0] = '\0'; trx->sess = sess; @@ -349,6 +354,8 @@ trx_free( trx->global_read_view = NULL; ut_a(trx->read_view == NULL); + ut_a(trx->dict_undo_list == NULL); + ut_a(trx->dict_redo_list == NULL); mem_free(trx); } @@ -740,6 +747,10 @@ trx_commit_off_kernel( ut_ad(mutex_own(&kernel_mutex)); + /* Can't commit if we have dictionary UNDO records */ + ut_a(!trx->dict_undo_list); + ut_a(!trx->dict_redo_list); + trx->must_flush_log_later = FALSE; rseg = trx->rseg; @@ -1558,6 +1569,14 @@ trx_commit_for_mysql( ut_a(trx); + if (trx->sync_cb) { + ulint err; + + err = trx->sync_cb(trx, TRUE); + ut_a(err); + trx->sync_cb = NULL; + } + trx->op_info = "committing"; /* If we are doing the XA recovery of prepared transactions, then @@ -1575,7 +1594,7 @@ trx_commit_for_mysql( trx->sess = trx_dummy_sess; } - + mutex_exit(&kernel_mutex); trx_start_if_not_started(trx);