diff --git a/myisam/mi_search.c b/myisam/mi_search.c index 2f1c37e4f21..24f5db1401d 100644 --- a/myisam/mi_search.c +++ b/myisam/mi_search.c @@ -396,9 +396,18 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, matched=prefix_len+left; - for (my_flag=0;left;left--) - if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++])) - break; + if (sort_order) + { + for (my_flag=0;left;left--) + if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++])) + break; + } + else + { + for (my_flag=0;left;left--) + if ((my_flag= (int) *vseg++ - (int) *k++)) + break; + } if (my_flag>0) /* mismatch */ break; diff --git a/mysql-test/r/binary.result b/mysql-test/r/binary.result index 000c0c16d77..a4ced14bb12 100644 --- a/mysql-test/r/binary.result +++ b/mysql-test/r/binary.result @@ -59,8 +59,10 @@ concat("-",a,"-",b,"-") -hello-hello- select concat("-",a,"-",b,"-") from t1 where b="hello "; concat("-",a,"-",b,"-") +-hello-hello- select concat("-",a,"-",b,"-") from t1 ignore index (b) where b="hello "; concat("-",a,"-",b,"-") +-hello-hello- alter table t1 modify b tinytext not null, drop key b, add key (b(100)); select concat("-",a,"-",b,"-") from t1; concat("-",a,"-",b,"-") diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 0cc3ea2cf17..cfad82fa053 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -397,3 +397,95 @@ select c as c_a from t1 where c='б'; c_a б drop table t1; +create table t1 (c varchar(30) character set utf8 collate utf8_bin, unique(c(10))); +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +insert into t1 values ('aaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaaa' for key 1 +insert into t1 values ('aaaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaaaa' for key 1 +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +c1 +1 +select c c2 from t1 where c='2'; +c2 +2 +select c c3 from t1 where c='3'; +c3 +3 +select c cx from t1 where c='x'; +cx +x +select c cy from t1 where c='y'; +cy +y +select c cz from t1 where c='z'; +cz +z +select c ca10 from t1 where c='aaaaaaaaaa'; +ca10 +aaaaaaaaaa +select c cb20 from t1 where c=repeat('b',20); +cb20 +bbbbbbbbbbbbbbbbbbbb +drop table t1; +create table t1 (c char(3) character set utf8 collate utf8_bin, unique (c(2))); +insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); +insert into t1 values ('a'); +insert into t1 values ('aa'); +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'aaa' for key 1 +insert into t1 values ('b'); +insert into t1 values ('bb'); +insert into t1 values ('bbb'); +ERROR 23000: Duplicate entry 'bbb' for key 1 +insert into t1 values ('а'); +insert into t1 values ('аа'); +insert into t1 values ('ааа'); +ERROR 23000: Duplicate entry 'ааа' for key 1 +insert into t1 values ('б'); +insert into t1 values ('бб'); +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'ббб' for key 1 +insert into t1 values ('ꪪ'); +insert into t1 values ('ꪪꪪ'); +insert into t1 values ('ꪪꪪꪪ'); +ERROR 23000: Duplicate entry 'ꪪꪪ' for key 1 +drop table t1; +create table t1 ( +c char(10) character set utf8 collate utf8_bin, +unique key a using btree (c(1)) +) engine=heap; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c` char(10) character set utf8 collate utf8_bin default NULL, + UNIQUE KEY `a` TYPE BTREE (`c`(1)) +) ENGINE=HEAP DEFAULT CHARSET=latin1 +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +insert into t1 values ('aa'); +ERROR 23000: Duplicate entry 'aa' for key 1 +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'aaa' for key 1 +insert into t1 values ('б'); +insert into t1 values ('бб'); +ERROR 23000: Duplicate entry 'бÐ' for key 1 +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'бÐ' for key 1 +select c as c_all from t1 order by c; +c_all +a +b +c +d +e +f +б +select c as c_a from t1 where c='a'; +c_a +a +select c as c_a from t1 where c='б'; +c_a +б +drop table t1; diff --git a/mysql-test/r/endspace.result b/mysql-test/r/endspace.result index 4800bbf4ecb..167adea6674 100644 --- a/mysql-test/r/endspace.result +++ b/mysql-test/r/endspace.result @@ -19,7 +19,7 @@ select 'a a' > 'a', 'a \0' < 'a'; 1 1 select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a'; binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a' -1 1 1 +1 0 0 create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)); insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); check table t1; diff --git a/mysql-test/r/myisam.result b/mysql-test/r/myisam.result index 354675cd4d4..0109097d3a1 100644 --- a/mysql-test/r/myisam.result +++ b/mysql-test/r/myisam.result @@ -412,6 +412,7 @@ aaa. aaa . select concat(a,'.') from t1 where binary a='aaa'; concat(a,'.') +aaa . aaa. update t1 set a='bbb' where a='aaa'; select concat(a,'.') from t1; diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 0d3bec258bc..a8a02118269 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -189,7 +189,7 @@ drop table t2; # # Bug 4521: unique key prefix interacts poorly with utf8 -# Check keys with prefix compression +# MYISAM: keys with prefix compression, case insensitive collation. # create table t1 (c varchar(30) character set utf8, unique(c(10))); insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); @@ -211,7 +211,8 @@ drop table t1; # # Bug 4521: unique key prefix interacts poorly with utf8 -# Check fixed length keys +# MYISAM: fixed length keys, case insensitive collation +# create table t1 (c char(3) character set utf8, unique (c(2))); insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); insert into t1 values ('a'); @@ -283,3 +284,104 @@ select c as c_all from t1 order by c; select c as c_a from t1 where c='a'; select c as c_a from t1 where c='б'; drop table t1; + + +# +# Bug 4521: unique key prefix interacts poorly with utf8 +# MYISAM: keys with prefix compression, binary collation. +# +create table t1 (c varchar(30) character set utf8 collate utf8_bin, unique(c(10))); +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +--error 1062 +insert into t1 values ('aaaaaaaaaaa'); +--error 1062 +insert into t1 values ('aaaaaaaaaaaa'); +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +select c c2 from t1 where c='2'; +select c c3 from t1 where c='3'; +select c cx from t1 where c='x'; +select c cy from t1 where c='y'; +select c cz from t1 where c='z'; +select c ca10 from t1 where c='aaaaaaaaaa'; +select c cb20 from t1 where c=repeat('b',20); +drop table t1; + +# +# Bug 4521: unique key prefix interacts poorly with utf8 +# MYISAM: fixed length keys, binary collation +# +create table t1 (c char(3) character set utf8 collate utf8_bin, unique (c(2))); +insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); +insert into t1 values ('a'); +insert into t1 values ('aa'); +--error 1062 +insert into t1 values ('aaa'); +insert into t1 values ('b'); +insert into t1 values ('bb'); +--error 1062 +insert into t1 values ('bbb'); +insert into t1 values ('а'); +insert into t1 values ('аа'); +--error 1062 +insert into t1 values ('ааа'); +insert into t1 values ('б'); +insert into t1 values ('бб'); +--error 1062 +insert into t1 values ('ббб'); +insert into t1 values ('ꪪ'); +insert into t1 values ('ꪪꪪ'); +--error 1062 +insert into t1 values ('ꪪꪪꪪ'); +drop table t1; + +# +# Bug 4531: unique key prefix interacts poorly with utf8 +# Check HEAP+HASH, binary collation +# +# This doesn't work correctly yet. +# +#create table t1 ( +#c char(10) character set utf8 collate utf8_bin, +#unique key a using hash (c(1)) +#) engine=heap; +#show create table t1; +#insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +#--error 1062 +#insert into t1 values ('aa'); +#--error 1062 +#insert into t1 values ('aaa'); +#insert into t1 values ('б'); +#--error 1062 +#insert into t1 values ('бб'); +#--error 1062 +#insert into t1 values ('ббб'); +#select c as c_all from t1 order by c; +#select c as c_a from t1 where c='a'; +#select c as c_a from t1 where c='б'; +#drop table t1; + +# +# Bug 4531: unique key prefix interacts poorly with utf8 +# Check HEAP+BTREE, binary collation +# +create table t1 ( +c char(10) character set utf8 collate utf8_bin, +unique key a using btree (c(1)) +) engine=heap; +show create table t1; +insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f'); +--error 1062 +insert into t1 values ('aa'); +--error 1062 +insert into t1 values ('aaa'); +insert into t1 values ('б'); +--error 1062 +insert into t1 values ('бб'); +--error 1062 +insert into t1 values ('ббб'); +select c as c_all from t1 order by c; +select c as c_a from t1 where c='a'; +select c as c_a from t1 where c='б'; +drop table t1; diff --git a/sql/field.h b/sql/field.h index fe06cd96f1a..83c5a71f07f 100644 --- a/sql/field.h +++ b/sql/field.h @@ -357,7 +357,7 @@ public: uint size_of() const { return sizeof(*this); } CHARSET_INFO *charset(void) const { return field_charset; } void set_charset(CHARSET_INFO *charset) { field_charset=charset; } - bool binary() const { return field_charset->state & MY_CS_BINSORT ? 1 : 0; } + bool binary() const { return field_charset == &my_charset_bin; } uint32 max_length() { return field_length; } friend class create_field; }; diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index 39ef6ca855a..7cd534d60b3 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -357,9 +357,11 @@ ulong ha_berkeley::index_flags(uint idx, uint part, bool all_parts) const case HA_KEYTYPE_VARTEXT: /* As BDB stores only one copy of equal strings, we can't use key read - on these + on these. Binary collations do support key read though. */ - flags&= ~HA_KEYREAD_ONLY; + if (!(table->key_info[idx].key_part[i].field->charset()->state + & MY_CS_BINSORT)) + flags&= ~HA_KEYREAD_ONLY; break; default: // Keep compiler happy break; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 23bdad1aae5..3c75dba42da 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -303,10 +303,10 @@ int Arg_comparator::set_compare_func(Item_bool_func2 *item, Item_result type) my_coll_agg_error((*a)->collation, (*b)->collation, owner->func_name()); return 1; } - if (my_binary_compare(cmp_collation.collation)) + if (cmp_collation.collation == &my_charset_bin) { /* - We are using binary collation, change to compare byte by byte, + We are using BLOB/BINARY/VARBINARY, change to compare byte by byte, without removing end space */ if (func == &Arg_comparator::compare_string) diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index cc83471f264..e759a5654f1 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -68,31 +68,10 @@ static uchar bin_char_array[] = -/* - Compare two strings. Result is sign(first_argument - second_argument) - - SYNOPSIS - my_strnncoll_binary() - cs Chararacter set - s String to compare - slen Length of 's' - t String to compare - tlen Length of 't' - - NOTE - This is used also when comparing with end space removal, as end space - is significant for binary strings - - RETURN - < 0 s < t - 0 s == t - > 0 s > t -*/ - static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, uint slen, - const uchar *t, uint tlen, - my_bool t_is_prefix) + const uchar *s, uint slen, + const uchar *t, uint tlen, + my_bool t_is_prefix) { uint len=min(slen,tlen); int cmp= memcmp(s,t,len); @@ -100,14 +79,105 @@ static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)), } +/* + Compare two strings. Result is sign(first_argument - second_argument) + + SYNOPSIS + my_strnncollsp_binary() + cs Chararacter set + s String to compare + slen Length of 's' + t String to compare + tlen Length of 't' + + NOTE + This function is used for real binary strings, i.e. for + BLOB, BINARY(N) and VARBINARY(N). + It does not ignore trailing spaces. + + RETURN + < 0 s < t + 0 s == t + > 0 s > t +*/ + static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, uint slen, - const uchar *t, uint tlen) + const uchar *s, uint slen, + const uchar *t, uint tlen) { return my_strnncoll_binary(cs,s,slen,t,tlen,0); } +static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), + const uchar *s, uint slen, + const uchar *t, uint tlen, + my_bool t_is_prefix) +{ + uint len=min(slen,tlen); + int cmp= memcmp(s,t,len); + return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen); +} + + +/* + Compare two strings. Result is sign(first_argument - second_argument) + + SYNOPSIS + my_strnncollsp_8bit_bin() + cs Chararacter set + s String to compare + slen Length of 's' + t String to compare + tlen Length of 't' + + NOTE + This function is used for character strings with binary collations. + It ignores trailing spaces. + + RETURN + < 0 s < t + 0 s == t + > 0 s > t +*/ + +static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) +{ + const uchar *end; + uint length; + + end= a + (length= min(a_length, b_length)); + while (a < end) + { + if (*a++ != *b++) + return ((int) a[-1] - (int) b[-1]); + } + if (a_length != b_length) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in s */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return 0; +} + + /* This function is used for all conversion functions */ static void my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)), @@ -342,6 +412,20 @@ skip: MY_COLLATION_HANDLER my_collation_8bit_bin_handler = +{ + NULL, /* init */ + my_strnncoll_8bit_bin, + my_strnncollsp_8bit_bin, + my_strnxfrm_bin, + my_like_range_simple, + my_wildcmp_bin, + my_strcasecmp_bin, + my_instr_bin, + my_hash_sort_bin +}; + + +static MY_COLLATION_HANDLER my_collation_binary_handler = { NULL, /* init */ my_strnncoll_binary, @@ -407,5 +491,5 @@ CHARSET_INFO my_charset_bin = 0, /* min_sort_char */ 255, /* max_sort_char */ &my_charset_handler, - &my_collation_8bit_bin_handler + &my_collation_binary_handler }; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 7b0dadcfa19..ecafa6356d5 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -360,11 +360,62 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen); } + +/* + Compare two strings. + + SYNOPSIS + my_strnncollsp_mb_bin() + cs Chararacter set + s String to compare + slen Length of 's' + t String to compare + tlen Length of 't' + + NOTE + This function is used for character strings with binary collations. + It ignores trailing spaces. + + RETURN + A negative number if s < t + A positive number if s > t + 0 if strings are equal +*/ + static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, uint slen, - const uchar *t, uint tlen) + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - return my_strnncoll_mb_bin(cs,s,slen,t,tlen,0); + const uchar *end; + uint length; + + end= a + (length= min(a_length, b_length)); + while (a < end) + { + if (*a++ != *b++) + return ((int) a[-1] - (int) b[-1]); + } + if (a_length != b_length) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in s */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return 0; }