From f9d1a5a1c6505509d54edb46b5f2e6af20d14ff6 Mon Sep 17 00:00:00 2001 From: "bar@mysql.com" <> Date: Fri, 19 Nov 2004 14:25:25 +0400 Subject: [PATCH] Incorrect response with partial utf8 index --- mysql-test/include/ctype_common.inc | 56 +++++++++++++++++++++ mysql-test/r/ctype_uca.result | 57 ++++++++++++++++++++++ mysql-test/t/ctype_uca.test | 4 ++ strings/ctype-mb.c | 75 +++++++++++++++++------------ strings/ctype-uca.c | 32 ++++++------ 5 files changed, 178 insertions(+), 46 deletions(-) create mode 100644 mysql-test/include/ctype_common.inc diff --git a/mysql-test/include/ctype_common.inc b/mysql-test/include/ctype_common.inc new file mode 100644 index 00000000000..77937bdb854 --- /dev/null +++ b/mysql-test/include/ctype_common.inc @@ -0,0 +1,56 @@ +# +# Common tests for all character sets and collations. +# Include this file from a test with @test_characrer_set +# and @test_collation set to desired values. +# +# Please don't use SHOW CREATE TABLE in this file, +# we want it to be HANDLER independent. You can +# use SHOW FULL COLUMNS instead. +# +# Please surround all CREATE TABLE with --disable_warnings +# and --enable_warnings to be able to set storage_engine +# without having to check if the hanlder exists. + +SET @safe_character_set_server= @@character_set_server; +SET @safe_collation_server= @@collation_server; +SET character_set_server= @test_character_set; +SET collation_server= @test_collation; +CREATE DATABASE d1; +USE d1; + +# +# Bug 1883: LIKE did not work in some cases with a key. +# +--disable_warnings +CREATE TABLE t1 (c CHAR(10), KEY(c)); +--enable_warnings +# check the column was created with the expected charset/collation +SHOW FULL COLUMNS FROM t1; +INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa'); +SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%'; +DROP TABLE t1; + +# +# Bug 6643 incorrect response with partial utf8 index +# +--disable_warnings +CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2))); +--enable_warnings +# check the column was created with the expected charset/collation +SHOW FULL COLUMNS FROM t1; +INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab'); +SELECT c1 as want3results from t1 where c1 like 'l%'; +SELECT c1 as want3results from t1 where c1 like 'lo%'; +SELECT c1 as want1result from t1 where c1 like 'loc%'; +SELECT c1 as want1result from t1 where c1 like 'loca%'; +SELECT c1 as want1result from t1 where c1 like 'locat%'; +SELECT c1 as want1result from t1 where c1 like 'locati%'; +SELECT c1 as want1result from t1 where c1 like 'locatio%'; +SELECT c1 as want1result from t1 where c1 like 'location%'; +DROP TABLE t1; + +DROP DATABASE d1; +# Restore settings +USE test; +SET character_set_server= @safe_character_set_server; +SET collation_server= @safe_collation_server; diff --git a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result index 90681795513..7620b18eea6 100644 --- a/mysql-test/r/ctype_uca.result +++ b/mysql-test/r/ctype_uca.result @@ -2315,3 +2315,60 @@ HEX(CONVERT(col1 USING ucs2)) 064A06A9062F064A06AF0631 064A06A9064A DROP TABLE t1; +SET @test_character_set= 'utf8'; +SET @test_collation= 'utf8_swedish_ci'; +SET @safe_character_set_server= @@character_set_server; +SET @safe_collation_server= @@collation_server; +SET character_set_server= @test_character_set; +SET collation_server= @test_collation; +CREATE DATABASE d1; +USE d1; +CREATE TABLE t1 (c CHAR(10), KEY(c)); +SHOW FULL COLUMNS FROM t1; +Field Type Collation Null Key Default Extra Privileges Comment +c char(10) utf8_swedish_ci YES MUL NULL select,insert,update,references +INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa'); +SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%'; +want3results +aaa +aaaa +aaaaa +DROP TABLE t1; +CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2))); +SHOW FULL COLUMNS FROM t1; +Field Type Collation Null Key Default Extra Privileges Comment +c1 varchar(15) utf8_swedish_ci YES MUL NULL select,insert,update,references +INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab'); +SELECT c1 as want3results from t1 where c1 like 'l%'; +want3results +location +loberge +lotre +SELECT c1 as want3results from t1 where c1 like 'lo%'; +want3results +location +loberge +lotre +SELECT c1 as want1result from t1 where c1 like 'loc%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'loca%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locat%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locati%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'locatio%'; +want1result +location +SELECT c1 as want1result from t1 where c1 like 'location%'; +want1result +location +DROP TABLE t1; +DROP DATABASE d1; +USE test; +SET character_set_server= @safe_character_set_server; +SET collation_server= @safe_collation_server; diff --git a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test index 708a31d637e..e640e6b53dc 100644 --- a/mysql-test/t/ctype_uca.test +++ b/mysql-test/t/ctype_uca.test @@ -435,3 +435,7 @@ INSERT INTO t1 VALUES (CONVERT(_ucs2 0x06280648062F0646062F USING utf8)); INSERT INTO t1 VALUES (CONVERT(_ucs2 0x06450647064506270646 USING utf8)); SELECT HEX(CONVERT(col1 USING ucs2)) FROM t1 ORDER BY col1 COLLATE utf8_persian_ci, col1 COLLATE utf8_bin; DROP TABLE t1; + +SET @test_character_set= 'utf8'; +SET @test_collation= 'utf8_swedish_ci'; +-- source include/ctype_common.inc diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 7d81766c4cb..3cdf7f460cd 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -443,6 +443,37 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), } } + +/* + Write max key: create a buffer with multibyte + representation of the max_sort_char character, + and copy it into max_str in a loop. +*/ +static void pad_max_char(CHARSET_INFO *cs, char *str, char *end) +{ + char buf[10]; + char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, + (uchar*) buf + sizeof(buf)); + DBUG_ASSERT(buflen > 0); + do + { + if ((str + buflen) < end) + { + /* Enough space for the characer */ + memcpy(str, buf, buflen); + str+= buflen; + } + else + { + /* + There is no space for whole multibyte + character, then add trailing spaces. + */ + *str++= ' '; + } + } while (str < end); +} + /* ** Calculate min_str and max_str that ranges a LIKE string. ** Arguments: @@ -467,10 +498,15 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, char *min_str,char *max_str, uint *min_length,uint *max_length) { - const char *end=ptr+ptr_length; - char *min_org=min_str; - char *min_end=min_str+res_length; - char *max_end=max_str+res_length; + const char *end; + char *min_org= min_str; + char *min_end= min_str + res_length; + char *max_end= max_str + res_length; + uint charlen= my_charpos(cs, ptr, ptr+ptr_length, res_length/cs->mbmaxlen); + + if (charlen < ptr_length) + ptr_length= charlen; + end= ptr + ptr_length; for (; ptr != end && min_str != min_end ; ptr++) { @@ -482,16 +518,14 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, } if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */ { - char buf[10]; - uint buflen; - uint charlen= my_charpos(cs, min_org, min_str, res_length/cs->mbmaxlen); + charlen= my_charpos(cs, min_org, min_str, res_length/cs->mbmaxlen); if (charlen < (uint) (min_str - min_org)) min_str= min_org + charlen; /* Write min key */ *min_length= (uint) (min_str - min_org); - *max_length=res_length; + *max_length= res_length; do { *min_str++= (char) cs->min_sort_char; @@ -502,27 +536,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, representation of the max_sort_char character, and copy it into max_str in a loop. */ - buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, - (uchar*) buf + sizeof(buf)); - DBUG_ASSERT(buflen > 0); - do - { - if ((max_str + buflen) <= max_end) - { - /* Enough space for max characer */ - memcpy(max_str, buf, buflen); - max_str+= buflen; - } - else - { - /* - There is no space for whole multibyte - character, then add trailing spaces. - */ - - *max_str++= ' '; - } - } while (max_str != max_end); + pad_max_char(cs, max_str, max_end); return 0; } *min_str++= *max_str++ = *ptr; @@ -530,7 +544,8 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, *min_length= *max_length = (uint) (min_str - min_org); while (min_str != min_end) - *min_str++ = *max_str++ = ' '; /* Because if key compression */ + *min_str++= ' '; /* Because if key compression */ + pad_max_char(cs, max_str, max_end); return 0; } diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 91af7af0c54..8df5b3277c1 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -8567,7 +8567,7 @@ CHARSET_INFO my_charset_utf8_icelandic_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8594,7 +8594,7 @@ CHARSET_INFO my_charset_utf8_latvian_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8621,7 +8621,7 @@ CHARSET_INFO my_charset_utf8_romanian_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8648,7 +8648,7 @@ CHARSET_INFO my_charset_utf8_slovenian_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8675,7 +8675,7 @@ CHARSET_INFO my_charset_utf8_polish_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8702,7 +8702,7 @@ CHARSET_INFO my_charset_utf8_estonian_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8729,7 +8729,7 @@ CHARSET_INFO my_charset_utf8_spanish_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8756,7 +8756,7 @@ CHARSET_INFO my_charset_utf8_swedish_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8783,7 +8783,7 @@ CHARSET_INFO my_charset_utf8_turkish_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8810,7 +8810,7 @@ CHARSET_INFO my_charset_utf8_czech_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8838,7 +8838,7 @@ CHARSET_INFO my_charset_utf8_danish_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8865,7 +8865,7 @@ CHARSET_INFO my_charset_utf8_lithuanian_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8892,7 +8892,7 @@ CHARSET_INFO my_charset_utf8_slovak_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8919,7 +8919,7 @@ CHARSET_INFO my_charset_utf8_spanish2_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8946,7 +8946,7 @@ CHARSET_INFO my_charset_utf8_roman_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler, @@ -8973,7 +8973,7 @@ CHARSET_INFO my_charset_utf8_persian_uca_ci= NULL, /* ident_map */ 8, /* strxfrm_multiply */ 1, /* mbminlen */ - 2, /* mbmaxlen */ + 3, /* mbmaxlen */ 9, /* min_sort_char */ 0xFFFF, /* max_sort_char */ &my_charset_utf8_handler,