From 7e7dfcccd64472c9d46b319e997181d0354b668c Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 6 Sep 2004 20:04:22 +0500 Subject: [PATCH] Bug #5324 Bug in UCA collations with LIKE comparisons and INDEX --- include/m_ctype.h | 7 +++ mysql-test/r/ctype_uca.result | 39 ++++++++++++++++ mysql-test/t/ctype_uca.test | 37 +++++++++++++++ strings/ctype-mb.c | 86 +++++++++++++++++++++++++++++++++++ strings/ctype-uca.c | 7 +-- 5 files changed, 173 insertions(+), 3 deletions(-) diff --git a/include/m_ctype.h b/include/m_ctype.h index 65b11f4c06a..16490af7fc3 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -312,6 +312,13 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, char *min_str, char *max_str, uint *min_length, uint *max_length); +my_bool my_like_range_mb(CHARSET_INFO *cs, + const char *ptr, uint ptr_length, + pbool escape, pbool w_one, pbool w_many, + uint res_length, + char *min_str, char *max_str, + uint *min_length, uint *max_length); + my_bool my_like_range_ucs2(CHARSET_INFO *cs, const char *ptr, uint ptr_length, pbool escape, pbool w_one, pbool w_many, diff --git a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result index 94fe15fed26..da4b5bfb663 100644 --- a/mysql-test/r/ctype_uca.result +++ b/mysql-test/r/ctype_uca.result @@ -1872,3 +1872,42 @@ Z,z,Ź,ź,Ż,ż,Ž,ž ǁ ǂ ǃ +drop table t1; +SET NAMES utf8; +CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c)); +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8)); +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) +COLLATE utf8_general_ci; +c +Μωδαί̈ +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8)); +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) +COLLATE utf8_general_ci ORDER BY c; +c +Μωδ +Μωδαί̈ +DROP TABLE t1; +CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c)); +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308); +SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci; +c +Μωδαί̈ +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4); +SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 +COLLATE ucs2_unicode_ci ORDER BY c; +c +Μωδ +Μωδαί̈ +DROP TABLE t1; +CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c)); +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8)); +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci; +c +Μωδαί̈ +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8)); +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) +COLLATE utf8_unicode_ci ORDER BY c; +c +Μωδ +Μωδαί̈ +DROP TABLE t1; diff --git a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test index 187d21f9ab7..d9181b19992 100644 --- a/mysql-test/t/ctype_uca.test +++ b/mysql-test/t/ctype_uca.test @@ -180,3 +180,40 @@ select group_concat(c1 order by c1) from t1 group by c1 collate utf8_slovak_ci; select group_concat(c1 order by c1) from t1 group by c1 collate utf8_spanish2_ci; select group_concat(c1 order by c1) from t1 group by c1 collate utf8_roman_ci; +drop table t1; + +# +# Bug#5324 +# +SET NAMES utf8; +#test1 +CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c)); +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8)); +#Check one row +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) +COLLATE utf8_general_ci; +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8)); +#Check two rows +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) +COLLATE utf8_general_ci ORDER BY c; +DROP TABLE t1; +#test2 +CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c)); +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308); +#Check one row +SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci; +INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4); +#Check two rows +SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 +COLLATE ucs2_unicode_ci ORDER BY c; +DROP TABLE t1; +#test 3 +CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c)); +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8)); +#Check one row row +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci; +INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8)); +#Check two rows +SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) +COLLATE utf8_unicode_ci ORDER BY c; +DROP TABLE t1; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 25ee85d62b4..3bfc66029ce 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -458,6 +458,92 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), } } +/* +** Calculate min_str and max_str that ranges a LIKE string. +** Arguments: +** ptr Pointer to LIKE string. +** ptr_length Length of LIKE string. +** escape Escape character in LIKE. (Normally '\'). +** All escape characters should be removed from min_str and max_str +** res_length Length of min_str and max_str. +** min_str Smallest case sensitive string that ranges LIKE. +** Should be space padded to res_length. +** max_str Largest case sensitive string that ranges LIKE. +** Normally padded with the biggest character sort value. +** +** The function should return 0 if ok and 1 if the LIKE string can't be +** optimized ! +*/ + +my_bool my_like_range_mb(CHARSET_INFO *cs, + const char *ptr,uint ptr_length, + pbool escape, pbool w_one, pbool w_many, + uint res_length, + char *min_str,char *max_str, + uint *min_length,uint *max_length) +{ + const char *end=ptr+ptr_length; + char *min_org=min_str; + char *min_end=min_str+res_length; + char *max_end=max_str+res_length; + + for (; ptr != end && min_str != min_end ; ptr++) + { + if (*ptr == escape && ptr+1 != end) + { + ptr++; /* Skip escape */ + *min_str++= *max_str++ = *ptr; + continue; + } + if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */ + { + char buf[10]; + uint buflen; + + /* Write min key */ + *min_length= (uint) (min_str - min_org); + *max_length=res_length; + do + { + *min_str++= (char) cs->min_sort_char; + } while (min_str != min_end); + + /* + Write max key: create a buffer with multibyte + representation of the max_sort_char character, + and copy it into max_str in a loop. + */ + buflen= cs->cset->wc_mb(cs, cs->max_sort_char, buf, buf + sizeof(buf)); + DBUG_ASSERT(buflen > 0); + do + { + if ((max_str + buflen) <= max_end) + { + /* Enough space for max characer */ + memcpy(max_str, buf, buflen); + max_str+= buflen; + } + else + { + /* + There is no space for whole multibyte + character, then add trailing spaces. + */ + + *max_str++= ' '; + } + } while (max_str != max_end); + return 0; + } + *min_str++= *max_str++ = *ptr; + } + *min_length= *max_length = (uint) (min_str - min_org); + + while (min_str != min_end) + *min_str++ = *max_str++ = ' '; /* Because if key compression */ + return 0; +} + static int my_wildcmp_mb_bin(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index cecc3be5045..edb84dbf225 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -6876,7 +6876,8 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner) int mblen; if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, &wc, - scanner->sbeg, scanner->send)) < 0)) + scanner->sbeg, + scanner->send)) <= 0)) return -1; scanner->page= wc >> 8; @@ -7918,7 +7919,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = my_strnncoll_ucs2_uca, my_strnncollsp_ucs2_uca, my_strnxfrm_ucs2_uca, - my_like_range_simple, + my_like_range_ucs2, my_wildcmp_uca, NULL, my_instr_mb, @@ -8369,7 +8370,7 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler = my_strnncoll_any_uca, my_strnncollsp_any_uca, my_strnxfrm_any_uca, - my_like_range_simple, + my_like_range_mb, my_wildcmp_uca, NULL, my_instr_mb,