diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h index 3fc3e850987..2564fcfb97a 100644 --- a/innobase/include/dict0mem.h +++ b/innobase/include/dict0mem.h @@ -151,7 +151,12 @@ struct dict_col_struct{ in some of the functions below */ }; -#define DICT_MAX_COL_PREFIX_LEN 512 +/* DICT_MAX_COL_PREFIX_LEN is measured in bytes. Starting from 4.1.6, we +define max col prefix len as 3 * 256, so that one can create a column prefix +index on 256 characters of a TEXT field also in the UTF-8 charset. In that +charset, a character may take at most 3 bytes. */ + +#define DICT_MAX_COL_PREFIX_LEN 768 /* Data structure for a field in an index */ struct dict_field_struct{ @@ -160,9 +165,13 @@ struct dict_field_struct{ ulint order; /* flags for ordering this field: DICT_DESCEND, ... */ ulint prefix_len; /* 0 or the length of the column - prefix in a MySQL index of type, e.g., - INDEX (textcol(25)); must be smaller - than DICT_MAX_COL_PREFIX_LEN */ + prefix in bytes in a MySQL index of + type, e.g., INDEX (textcol(25)); + must be smaller than + DICT_MAX_COL_PREFIX_LEN; NOTE that + in the UTF-8 charset, MySQL reserves + sets this to 3 * the prefix len in + UTF-8 chars */ }; /* Data structure for an index tree */ diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index c796646fc37..152bb0291c3 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -1630,6 +1630,8 @@ row_create_index_for_mysql( trx->op_info = "creating index"; + trx_start_if_not_started(trx); + /* Check that the same column does not appear twice in the index. Starting from 4.0.14, InnoDB should be able to cope with that, but safer not to allow them. */ @@ -1656,9 +1658,16 @@ row_create_index_for_mysql( goto error_handling; } } - } + + /* Check also that prefix_len < DICT_MAX_COL_PREFIX_LEN */ - trx_start_if_not_started(trx); + if (dict_index_get_nth_field(index, i)->prefix_len + >= DICT_MAX_COL_PREFIX_LEN) { + err = DB_TOO_BIG_RECORD; + + goto error_handling; + } + } if (row_mysql_is_recovered_tmp_table(index->table_name)) { diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 132bb835d82..f5da82a8a8c 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -3525,10 +3525,6 @@ create_index( prefix_len = 0; } - if (prefix_len >= DICT_MAX_COL_PREFIX_LEN) { - DBUG_RETURN(-1); - } - /* We assume all fields should be sorted in ascending order, hence the '0': */ @@ -5333,39 +5329,32 @@ innobase_get_at_most_n_mbchars( /* If the charset is multi-byte, then we must find the length of the first at most n chars in the string. If the string contains less characters than n, then we return the length to the end of the last - full character. */ + character. */ if (charset->mbmaxlen > 1) { -/* ulint right_value; */ - /* my_charpos() returns the byte length of the first n_chars - characters, or the end of the last full character */ + characters, or a value bigger than the length of str, if + there were not enough full characters in str. + + Why does the code below work: + Suppose that we are looking for n UTF-8 characters. + + 1) If the string is long enough, then the prefix contains at + least n complete UTF-8 characters + maybe some extra + characters + an incomplete UTF-8 character. No problem in + this case. The function returns the pointer to the + end of the nth character. + + 2) If the string is not long enough, then the string contains + the complete value of a column, that is, only complete UTF-8 + characters, and we can store in the column prefix index the + whole string. */ char_length = my_charpos(charset, str, str + data_len, n_chars); - - /*################################################*/ - /* TODO: my_charpos sometimes returns a non-sensical value - that is BIGGER than data_len: try to fix this bug partly with - these heuristics. This is NOT a complete bug fix! */ - if (char_length > data_len) { char_length = data_len; } - /*################################################*/ - -/* printf("data_len %lu, n_chars %lu, char_len %lu\n", - data_len, n_chars, char_length); - if (data_len < n_chars) { - right_value = data_len; - } else { - right_value = n_chars; - } - - if (right_value != char_length) { - printf("ERRRRRROOORRRRRRRRRRRR!!!!!!!!!\n"); - } -*/ } else { if (data_len < prefix_len) { char_length = data_len;