From d2f7fe35584a75d5bc3a9a6950cc15d264e31241 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 20 Jul 2006 15:52:48 +0500 Subject: [PATCH 1/3] Bug#20471 LIKE search fails with indexed utf8 char column The main problem was already fixed by Igor under terms of 16674. Adding some additional minor fixes and tests. include/m_ctype.h: Adding reference to CHARSET_INFO.txt mysql-test/r/ctype_utf8.result: Adding test case mysql-test/t/ctype_utf8.test: Adding test case strings/CHARSET_INFO.txt: Adding comment about max_sort_char strings/ctype-mb.c: Restiring that non-Unicode character sets use 0xFF as pad character for max_str. Only Unicode character sets use wc_mb. strings/ctype-utf8.c: Fixed that max_sort_char for UTF8 from U+00FF to U+FFFF. --- include/m_ctype.h | 4 ++ mysql-test/r/ctype_utf8.result | 75 ++++++++++++++++++++++++++++++++++ mysql-test/t/ctype_utf8.test | 70 +++++++++++++++++++++++++++++++ strings/CHARSET_INFO.txt | 12 +++++- strings/ctype-mb.c | 21 ++++++++-- strings/ctype-utf8.c | 2 +- 6 files changed, 177 insertions(+), 7 deletions(-) diff --git a/include/m_ctype.h b/include/m_ctype.h index cd1dac9dde8..b9ed39414bb 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -108,6 +108,8 @@ enum my_lex_states struct charset_info_st; + +/* See strings/CHARSET_INFO.txt about information on this structure */ typedef struct my_collation_handler_st { my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); @@ -147,6 +149,7 @@ extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler; extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler; +/* See strings/CHARSET_INFO.txt about information on this structure */ typedef struct my_charset_handler_st { my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); @@ -204,6 +207,7 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler; extern MY_CHARSET_HANDLER my_charset_ucs2_handler; +/* See strings/CHARSET_INFO.txt about information on this structure */ typedef struct charset_info_st { uint number; diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 4ceacaffcbb..61083510854 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -1124,6 +1124,81 @@ check table t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; +set names utf8; +create table t1 (s1 char(5) character set utf8); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%'; +before_delete_general_ci +ペテルグル +delete from t1 where s1 = 'Y'; +select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%'; +after_delete_general_ci +ペテルグル +drop table t1; +set names utf8; +create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%'; +before_delete_unicode_ci +ペテルグル +delete from t1 where s1 = 'Y'; +select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%'; +after_delete_unicode_ci +ペテルグル +drop table t1; +set names utf8; +create table t1 (s1 char(5) character set utf8 collate utf8_bin); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_bin from t1 where s1 like 'ペテ%'; +before_delete_bin +ペテルグル +delete from t1 where s1 = 'Y'; +select s1 as after_delete_bin from t1 where s1 like 'ペテ%'; +after_delete_bin +ペテルグル +drop table t1; +set names utf8; +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8 collate utf8_general_ci; +insert into t1 values ('あいうえおかきくけこさしすせそ'); +insert into t1 values ('さしすせそかきくけこあいうえお'); +select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; +gci1 +さしすせそかきくけこあいうえお +select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; +gci2 +あいうえおかきくけこさしすせそ +drop table t1; +set names utf8; +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8 collate utf8_unicode_ci; +insert into t1 values ('あいうえおかきくけこさしすせそ'); +insert into t1 values ('さしすせそかきくけこあいうえお'); +select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; +uci1 +さしすせそかきくけこあいうえお +select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; +uci2 +あいうえおかきくけこさしすせそ +drop table t1; +set names utf8; +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8 collate utf8_bin; +insert into t1 values ('あいうえおかきくけこさしすせそ'); +insert into t1 values ('さしすせそかきくけこあいうえお'); +select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%'; +bin1 +さしすせそかきくけこあいうえお +select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ'; +bin2 +あいうえおかきくけこさしすせそ +drop table t1; SET NAMES utf8; CREATE TABLE t1 (id int PRIMARY KEY, a varchar(16) collate utf8_unicode_ci NOT NULL default '', diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index b1d485ad1ce..0f6f6978e6c 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -926,6 +926,76 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb check table t1; drop table t1; +# +# Bug#20471 LIKE search fails with indexed utf8 char column +# +set names utf8; +create table t1 (s1 char(5) character set utf8); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%'; +delete from t1 where s1 = 'Y'; +select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%'; +drop table t1; + +set names utf8; +create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%'; +delete from t1 where s1 = 'Y'; +select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%'; +drop table t1; + +set names utf8; +create table t1 (s1 char(5) character set utf8 collate utf8_bin); +insert into t1 values +('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); +create index it1 on t1 (s1); +select s1 as before_delete_bin from t1 where s1 like 'ペテ%'; +delete from t1 where s1 = 'Y'; +select s1 as after_delete_bin from t1 where s1 like 'ペテ%'; +drop table t1; + +# additional tests from duplicate bug#20744 MySQL return no result + +set names utf8; +--disable_warnings +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8 collate utf8_general_ci; +--enable_warnings +insert into t1 values ('あいうえおかきくけこさしすせそ'); +insert into t1 values ('さしすせそかきくけこあいうえお'); +select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; +select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; +drop table t1; + +set names utf8; +--disable_warnings +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8 collate utf8_unicode_ci; +--enable_warnings +insert into t1 values ('あいうえおかきくけこさしすせそ'); +insert into t1 values ('さしすせそかきくけこあいうえお'); +select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; +select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; +drop table t1; + +set names utf8; +--disable_warnings +create table t1 (a varchar(30) not null primary key) +engine=innodb default character set utf8 collate utf8_bin; +--enable_warnings +insert into t1 values ('あいうえおかきくけこさしすせそ'); +insert into t1 values ('さしすせそかきくけこあいうえお'); +select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%'; +select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ'; +drop table t1; + + + # # Bug#14896: Comparison with a key in a partial index over mb chararacter field # diff --git a/strings/CHARSET_INFO.txt b/strings/CHARSET_INFO.txt index f7a10f95880..3fd262c6f12 100644 --- a/strings/CHARSET_INFO.txt +++ b/strings/CHARSET_INFO.txt @@ -33,7 +33,7 @@ typedef struct charset_info_st uint strxfrm_multiply; uint mbminlen; uint mbmaxlen; - char max_sort_char; /* For LIKE optimization */ + uint16 max_sort_char; /* For LIKE optimization */ MY_CHARSET_HANDLER *cset; MY_COLLATION_HANDLER *coll; @@ -134,7 +134,15 @@ Misc fields mbmaxlen - maximum multibyte sequence length. 1 for 8bit charsets. Can be also 2 or 3. - + max_sort_char - for LIKE range + in case of 8bit character sets - native code + of maximum character (max_str pad byte); + in case of UTF8 and UCS2 - Unicode code of the maximum + possible character (usually U+FFFF). This code is + converted to multibyte representation (usually 0xEFBFBF) + and then used as a pad sequence for max_str. + in case of other multibyte character sets - + max_str pad byte (usually 0xFF). MY_CHARSET_HANDLER ================== diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 4f57f7c78e4..6f63592c459 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), /* - Write max key: create a buffer with multibyte + Write max key: +- for non-Unicode character sets: + just set to 255. +- for Unicode character set (utf-8): + create a buffer with multibyte representation of the max_sort_char character, and copy it into max_str in a loop. */ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end) { char buf[10]; - char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, - (uchar*) buf + sizeof(buf)); + char buflen; + + if (!(cs->state & MY_CS_UNICODE)) + { + bfill(str, end - str, 255); + return; + } + + buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, + (uchar*) buf + sizeof(buf)); + DBUG_ASSERT(buflen > 0); do { @@ -894,7 +907,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler = my_strnncoll_mb_bin, my_strnncollsp_mb_bin, my_strnxfrm_mb_bin, - my_like_range_simple, + my_like_range_mb, my_wildcmp_mb_bin, my_strcasecmp_mb_bin, my_instr_mb, diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 94e8e6ba797..79239394816 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2373,7 +2373,7 @@ CHARSET_INFO my_charset_utf8_bin= 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ - 255, /* max_sort_char */ + 0xFFFF, /* max_sort_char */ 0, /* escape_with_backslash_is_dangerous */ &my_charset_utf8_handler, &my_collation_mb_bin_handler From 62dd7e5183b6c01b7ba996a5f09af2cffdbd8a25 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 11 Sep 2006 14:50:46 +0500 Subject: [PATCH 2/3] Bug#20922 mysql removes a name of first column in a table 0xFF is internal separator for SET|ENUM names. If this symbol is present in SET|ENUM names then we replace it with ','(deprecated symbol for SET|ENUM names) during frm creation and restore to 0xFF during frm opening mysql-test/r/type_enum.result: Bug#20922 mysql removes a name of first column in a table test case mysql-test/t/type_enum.test: Bug#20922 mysql removes a name of first column in a table test case sql/table.cc: Bug#20922 mysql removes a name of first column in a table Replace all ',' symbols with NAMES_SEP_CHAR in interval names. sql/unireg.cc: Bug#20922 mysql removes a name of first column in a table if NAMES_SEP_CHAR symbols are present in interval name then replace all NAMES_SEP_CHAR symbols with ',' --- mysql-test/r/type_enum.result | 9 +++++++++ mysql-test/t/type_enum.test | 9 +++++++++ sql/table.cc | 16 +++++++++++++++- sql/unireg.cc | 15 +++++++++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/type_enum.result b/mysql-test/r/type_enum.result index ab3c441a7e2..0fe3f674fba 100644 --- a/mysql-test/r/type_enum.result +++ b/mysql-test/r/type_enum.result @@ -1745,3 +1745,12 @@ create table t1 (a set('x','y') default 'x'); alter table t1 alter a set default 'z'; ERROR 42000: Invalid default value for 'a' drop table t1; +create table t1 (f1 int); +alter table t1 add f2 enum(0xFFFF); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `f1` int(11) default NULL, + `f2` enum('') default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +drop table t1; diff --git a/mysql-test/t/type_enum.test b/mysql-test/t/type_enum.test index 0d479f312cd..68f5664c36d 100644 --- a/mysql-test/t/type_enum.test +++ b/mysql-test/t/type_enum.test @@ -127,4 +127,13 @@ create table t1 (a set('x','y') default 'x'); alter table t1 alter a set default 'z'; drop table t1; + +# +# Bug#20922 mysql removes a name of first column in a table +# +create table t1 (f1 int); +alter table t1 add f2 enum(0xFFFF); +show create table t1; +drop table t1; + # End of 4.1 tests diff --git a/sql/table.cc b/sql/table.cc index 7587531b2f9..f22caf36679 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -387,7 +387,21 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag, count))) goto err_not_open; for (count= 0; count < interval->count; count++) - interval->type_lengths[count]= strlen(interval->type_names[count]); + { + char *val= (char*) interval->type_names[count]; + interval->type_lengths[count]= strlen(val); + /* + Replace all ',' symbols with NAMES_SEP_CHAR. + See the comment in unireg.cc, pack_fields() function + for details. + */ + for (uint cnt= 0 ; cnt < interval->type_lengths[count] ; cnt++) + { + char c= val[cnt]; + if (c == ',') + val[cnt]= NAMES_SEP_CHAR; + } + } interval->type_lengths[count]= 0; } } diff --git a/sql/unireg.cc b/sql/unireg.cc index e3bf763f700..16a0a66c3dd 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -637,6 +637,21 @@ static bool pack_fields(File file, List &create_fields, tmp.append(NAMES_SEP_CHAR); for (const char **pos=field->interval->type_names ; *pos ; pos++) { + char *val= (char*) *pos; + uint str_len= strlen(val); + /* + Note, hack: in old frm NAMES_SEP_CHAR is used to separate + names in the interval (ENUM/SET). To allow names to contain + NAMES_SEP_CHAR, we replace it with a comma before writing frm. + Backward conversion is done during frm file opening, + See table.cc, openfrm() function + */ + for (uint cnt= 0 ; cnt < str_len ; cnt++) + { + char c= val[cnt]; + if (c == NAMES_SEP_CHAR) + val[cnt]= ','; + } tmp.append(*pos); tmp.append(NAMES_SEP_CHAR); } From 342dea0e3525e7944262f5cee330043d4e98aa5b Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 14 Sep 2006 10:05:07 +0500 Subject: [PATCH 3/3] Better comment text (thanks to SergeyP for suggestions made for the b#20471 patch) --- include/m_ctype.h | 2 +- strings/ctype-mb.c | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/include/m_ctype.h b/include/m_ctype.h index b9ed39414bb..09ebc781c8d 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -109,7 +109,7 @@ enum my_lex_states struct charset_info_st; -/* See strings/CHARSET_INFO.txt about information on this structure */ +/* See strings/CHARSET_INFO.txt for information about this structure */ typedef struct my_collation_handler_st { my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 6f63592c459..bcbc128fc5c 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -449,13 +449,20 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), /* - Write max key: -- for non-Unicode character sets: - just set to 255. -- for Unicode character set (utf-8): - create a buffer with multibyte - representation of the max_sort_char character, - and copy it into max_str in a loop. + Fill the given buffer with 'maximum character' for given charset + SYNOPSIS + pad_max_char() + cs Character set + str Start of buffer to fill + end End of buffer to fill + + DESCRIPTION + Write max key: + - for non-Unicode character sets: + just set to 255. + - for Unicode character set (utf-8): + create a buffer with multibyte representation of the max_sort_char + character, and copy it into max_str in a loop. */ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end) {